From a3e72b75392834190cf00e7dee9a816cff6157c0 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 8 Dec 2022 15:48:36 -0500 Subject: [PATCH 01/64] Update build dependencies and mypy-protobuf --- sdks/python/build-requirements.txt | 17 +++-------------- sdks/python/setup.py | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/sdks/python/build-requirements.txt b/sdks/python/build-requirements.txt index bab18e1eee5e..067528cd99d1 100644 --- a/sdks/python/build-requirements.txt +++ b/sdks/python/build-requirements.txt @@ -18,22 +18,11 @@ # TODO(https://github.com/apache/beam/issues/20051): Consider PEP-517/PEP-518 instead of this file. setuptools -# grpcio-tools depends on grpcio and the grpcio>1.50.0 results in error(ImportModuleError six) -# when installing Apache Beam source via pip install -e . -# Adding six as part of build dependencies. -# https://github.com/apache/beam/issues/24432 -six wheel>=0.36.0 - -grpcio-tools==1.37.0 -# TODO(https://github.com/apache/beam/issues/23734): the sdist for grpcio==1.50.0 is failing on GH workers -# pin grpcio to the previous version. -grpcio==1.49.1;sys_platform=="darwin" -mypy-protobuf==1.18 -protobuf==3.19.4;python_version=="3.10" and sys_platform=="darwin" - +grpcio-tools==1.51.1 +mypy-protobuf==3.4.0 # Avoid https://github.com/pypa/virtualenv/issues/2006 -distlib==0.3.1 +distlib==0.3.6 # Numpy headers numpy>=1.14.3,<1.25 diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 7525177a5b26..61b5df0d7f52 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -180,15 +180,17 @@ def get_portability_package_data(): generate_protos_first() # Keep all dependencies inlined in the setup call, otherwise Dependabot won't # be able to parse it. - if sys.platform == 'darwin' and ( - sys.version_info.major == 3 and sys.version_info.minor == 10): - # TODO (https://github.com/apache/beam/issues/23585): Protobuf wheels - # for version 3.19.5, 3.19.6 and 3.20.x on Python 3.10 and MacOS are - # rolled back due to some errors on MacOS. So, for Python 3.10 on MacOS - # restrict the protobuf with tight upper bound(3.19.4) - protobuf_dependency = ['protobuf>3.12.2,<3.19.5'] - else: - protobuf_dependency = ['protobuf>3.12.2,<4'] + # if sys.platform == 'darwin' and ( + # sys.version_info.major == 3 and sys.version_info.minor == 10): + # # TODO (https://github.com/apache/beam/issues/23585): Protobuf wheels + # # for version 3.19.5, 3.19.6 and 3.20.x on Python 3.10 and MacOS are + # # rolled back due to some errors on MacOS. So, for Python 3.10 on MacOS + # # restrict the protobuf with tight upper bound(3.19.4) + # protobuf_dependency = ['protobuf>3.12.2,<3.19.5'] + # else: + # protobuf_dependency = ['protobuf>3.12.2,<4'] + + protobuf_dependency = ['protobuf>=4.21.1,<=4.21.11'] setuptools.setup( name=PACKAGE_NAME, From 673aaf3738ae33808c816bb8cbac9e04c79a4e54 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 28 Dec 2022 10:52:54 -0500 Subject: [PATCH 02/64] Update import --- sdks/python/gen_protos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 86d94d937e83..55b3985a1d29 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -123,7 +123,7 @@ def generate_urn_files(out_dir, api_path): This is executed at build time rather than dynamically on import to ensure that it is compatible with static type checkers like mypy. """ - import google.protobuf.pyext._message as pyext_message + from google._upb import _message as pyext_message from google.protobuf import message class Context(object): From ee1da36662487c85cba996a037a0c540772d82ad Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Tue, 14 Feb 2023 15:47:17 -0500 Subject: [PATCH 03/64] Add os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' --- sdks/python/gen_protos.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 55b3985a1d29..fb5ece0b8fb1 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -37,6 +37,7 @@ LOG = logging.getLogger() LOG.setLevel(logging.INFO) +os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' LICENSE_HEADER = """ # From 6140ff747e8b7dbcdecd226f7816852de1af4ee0 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 16 Feb 2023 14:13:06 -0500 Subject: [PATCH 04/64] Update gen_protos.py --- sdks/python/gen_protos.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index fb5ece0b8fb1..1666ecd7015d 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -33,6 +33,7 @@ from collections import defaultdict from importlib import import_module +from google.protobuf.internal import containers import pkg_resources LOG = logging.getLogger() @@ -124,7 +125,6 @@ def generate_urn_files(out_dir, api_path): This is executed at build time rather than dynamically on import to ensure that it is compatible with static type checkers like mypy. """ - from google._upb import _message as pyext_message from google.protobuf import message class Context(object): @@ -180,8 +180,9 @@ def python_repr(self, obj): obj, ( list, - pyext_message.RepeatedCompositeContainer, # pylint: disable=c-extension-no-member - pyext_message.RepeatedScalarContainer)): # pylint: disable=c-extension-no-member + containers.RepeatedScalarFieldContainer, + containers.RepeatedCompositeFieldContainer + )): # pylint: disable=c-extension-no-member return '[%s]' % ', '.join(self.python_repr(x) for x in obj) else: return repr(obj) @@ -233,6 +234,8 @@ def write_message(self, message_name, message, indent=0): with ctx.indent(): for obj_name, obj in inspect.getmembers(message): + if 'MonitoringInfoSpecs' in str(message): + pass if self.is_message_type(obj): ctx.lines += self.write_message(obj_name, obj, ctx._indent) elif self.is_enum_type(obj): @@ -253,6 +256,8 @@ def write_message(self, message_name, message, indent=0): for pb2_file in pb2_files: modname = os.path.splitext(pb2_file)[0] + if 'metric' in modname: + pass out_file = modname + '_urns.py' api_start_idx = modname.index(os.path.sep + 'api' + os.path.sep) import_path = modname[api_start_idx + 1:].replace(os.path.sep, '.') From 8f24747bb0376812a670d5432412152997d2dacc Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 16 Feb 2023 15:23:11 -0500 Subject: [PATCH 05/64] Manually generate proto file. --- .../coders/proto2_coder_test_messages_pb2.py | 338 ++---------------- 1 file changed, 28 insertions(+), 310 deletions(-) diff --git a/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py b/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py index 97ae8be9bfb1..547914f04a1f 100644 --- a/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py +++ b/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py @@ -1,27 +1,10 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# source: apache_beam/coders/proto2_coder_test_messages.proto - +# source: proto2_coder_test_messages.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection +from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database # @@protoc_insertion_point(imports) @@ -30,293 +13,28 @@ -DESCRIPTOR = _descriptor.FileDescriptor( - name='apache_beam/coders/proto2_coder_test_messages.proto', - package='proto2_coder_test_messages', - syntax='proto2', - serialized_options=b'\n\'org.apache.beam.sdk.extensions.protobuf', - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n3apache_beam/coders/proto2_coder_test_messages.proto\x12\x1aproto2_coder_test_messages\"P\n\x08MessageA\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\t\x12\x34\n\x06\x66ield2\x18\x02 \x03(\x0b\x32$.proto2_coder_test_messages.MessageB\"\x1a\n\x08MessageB\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x08\"\x10\n\x08MessageC*\x04\x08\x64\x10j\"\xad\x01\n\x0eMessageWithMap\x12\x46\n\x06\x66ield1\x18\x01 \x03(\x0b\x32\x36.proto2_coder_test_messages.MessageWithMap.Field1Entry\x1aS\n\x0b\x46ield1Entry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:\x02\x38\x01\"V\n\x18ReferencesMessageWithMap\x12:\n\x06\x66ield1\x18\x01 \x03(\x0b\x32*.proto2_coder_test_messages.MessageWithMap:Z\n\x06\x66ield1\x12$.proto2_coder_test_messages.MessageC\x18\x65 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:Z\n\x06\x66ield2\x12$.proto2_coder_test_messages.MessageC\x18\x66 \x01(\x0b\x32$.proto2_coder_test_messages.MessageBB)\n\'org.apache.beam.sdk.extensions.protobuf' -) - - -FIELD1_FIELD_NUMBER = 101 -field1 = _descriptor.FieldDescriptor( - name='field1', full_name='proto2_coder_test_messages.field1', index=0, - number=101, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=True, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key) -FIELD2_FIELD_NUMBER = 102 -field2 = _descriptor.FieldDescriptor( - name='field2', full_name='proto2_coder_test_messages.field2', index=1, - number=102, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=True, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key) - - -_MESSAGEA = _descriptor.Descriptor( - name='MessageA', - full_name='proto2_coder_test_messages.MessageA', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='field1', full_name='proto2_coder_test_messages.MessageA.field1', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=b"".decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='field2', full_name='proto2_coder_test_messages.MessageA.field2', index=1, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=83, - serialized_end=163, -) - - -_MESSAGEB = _descriptor.Descriptor( - name='MessageB', - full_name='proto2_coder_test_messages.MessageB', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='field1', full_name='proto2_coder_test_messages.MessageB.field1', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=False, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=165, - serialized_end=191, -) - - -_MESSAGEC = _descriptor.Descriptor( - name='MessageC', - full_name='proto2_coder_test_messages.MessageC', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(100, 106), ], - oneofs=[ - ], - serialized_start=193, - serialized_end=209, -) - - -_MESSAGEWITHMAP_FIELD1ENTRY = _descriptor.Descriptor( - name='Field1Entry', - full_name='proto2_coder_test_messages.MessageWithMap.Field1Entry', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='key', full_name='proto2_coder_test_messages.MessageWithMap.Field1Entry.key', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=b"".decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='value', full_name='proto2_coder_test_messages.MessageWithMap.Field1Entry.value', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=b'8\001', - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=302, - serialized_end=385, -) - -_MESSAGEWITHMAP = _descriptor.Descriptor( - name='MessageWithMap', - full_name='proto2_coder_test_messages.MessageWithMap', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='field1', full_name='proto2_coder_test_messages.MessageWithMap.field1', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[_MESSAGEWITHMAP_FIELD1ENTRY, ], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=212, - serialized_end=385, -) - - -_REFERENCESMESSAGEWITHMAP = _descriptor.Descriptor( - name='ReferencesMessageWithMap', - full_name='proto2_coder_test_messages.ReferencesMessageWithMap', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='field1', full_name='proto2_coder_test_messages.ReferencesMessageWithMap.field1', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=387, - serialized_end=473, -) - -_MESSAGEA.fields_by_name['field2'].message_type = _MESSAGEB -_MESSAGEWITHMAP_FIELD1ENTRY.fields_by_name['value'].message_type = _MESSAGEA -_MESSAGEWITHMAP_FIELD1ENTRY.containing_type = _MESSAGEWITHMAP -_MESSAGEWITHMAP.fields_by_name['field1'].message_type = _MESSAGEWITHMAP_FIELD1ENTRY -_REFERENCESMESSAGEWITHMAP.fields_by_name['field1'].message_type = _MESSAGEWITHMAP -DESCRIPTOR.message_types_by_name['MessageA'] = _MESSAGEA -DESCRIPTOR.message_types_by_name['MessageB'] = _MESSAGEB -DESCRIPTOR.message_types_by_name['MessageC'] = _MESSAGEC -DESCRIPTOR.message_types_by_name['MessageWithMap'] = _MESSAGEWITHMAP -DESCRIPTOR.message_types_by_name['ReferencesMessageWithMap'] = _REFERENCESMESSAGEWITHMAP -DESCRIPTOR.extensions_by_name['field1'] = field1 -DESCRIPTOR.extensions_by_name['field2'] = field2 -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -MessageA = _reflection.GeneratedProtocolMessageType('MessageA', (_message.Message,), { - 'DESCRIPTOR' : _MESSAGEA, - '__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2' - # @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageA) - }) -_sym_db.RegisterMessage(MessageA) - -MessageB = _reflection.GeneratedProtocolMessageType('MessageB', (_message.Message,), { - 'DESCRIPTOR' : _MESSAGEB, - '__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2' - # @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageB) - }) -_sym_db.RegisterMessage(MessageB) - -MessageC = _reflection.GeneratedProtocolMessageType('MessageC', (_message.Message,), { - 'DESCRIPTOR' : _MESSAGEC, - '__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2' - # @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageC) - }) -_sym_db.RegisterMessage(MessageC) - -MessageWithMap = _reflection.GeneratedProtocolMessageType('MessageWithMap', (_message.Message,), { - - 'Field1Entry' : _reflection.GeneratedProtocolMessageType('Field1Entry', (_message.Message,), { - 'DESCRIPTOR' : _MESSAGEWITHMAP_FIELD1ENTRY, - '__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2' - # @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageWithMap.Field1Entry) - }) - , - 'DESCRIPTOR' : _MESSAGEWITHMAP, - '__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2' - # @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageWithMap) - }) -_sym_db.RegisterMessage(MessageWithMap) -_sym_db.RegisterMessage(MessageWithMap.Field1Entry) - -ReferencesMessageWithMap = _reflection.GeneratedProtocolMessageType('ReferencesMessageWithMap', (_message.Message,), { - 'DESCRIPTOR' : _REFERENCESMESSAGEWITHMAP, - '__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2' - # @@protoc_insertion_point(class_scope:proto2_coder_test_messages.ReferencesMessageWithMap) - }) -_sym_db.RegisterMessage(ReferencesMessageWithMap) - -field1.message_type = _MESSAGEA -MessageC.RegisterExtension(field1) -field2.message_type = _MESSAGEB -MessageC.RegisterExtension(field2) - -DESCRIPTOR._options = None -_MESSAGEWITHMAP_FIELD1ENTRY._options = None +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n proto2_coder_test_messages.proto\x12\x1aproto2_coder_test_messages\"P\n\x08MessageA\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\t\x12\x34\n\x06\x66ield2\x18\x02 \x03(\x0b\x32$.proto2_coder_test_messages.MessageB\"\x1a\n\x08MessageB\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x08\"\x10\n\x08MessageC*\x04\x08\x64\x10j\"\xad\x01\n\x0eMessageWithMap\x12\x46\n\x06\x66ield1\x18\x01 \x03(\x0b\x32\x36.proto2_coder_test_messages.MessageWithMap.Field1Entry\x1aS\n\x0b\x46ield1Entry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:\x02\x38\x01\"V\n\x18ReferencesMessageWithMap\x12:\n\x06\x66ield1\x18\x01 \x03(\x0b\x32*.proto2_coder_test_messages.MessageWithMap:Z\n\x06\x66ield1\x12$.proto2_coder_test_messages.MessageC\x18\x65 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:Z\n\x06\x66ield2\x12$.proto2_coder_test_messages.MessageC\x18\x66 \x01(\x0b\x32$.proto2_coder_test_messages.MessageBB)\n\'org.apache.beam.sdk.extensions.protobuf') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'proto2_coder_test_messages_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + MessageC.RegisterExtension(field1) + MessageC.RegisterExtension(field2) + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\'org.apache.beam.sdk.extensions.protobuf' + _MESSAGEWITHMAP_FIELD1ENTRY._options = None + _MESSAGEWITHMAP_FIELD1ENTRY._serialized_options = b'8\001' + _MESSAGEA._serialized_start=64 + _MESSAGEA._serialized_end=144 + _MESSAGEB._serialized_start=146 + _MESSAGEB._serialized_end=172 + _MESSAGEC._serialized_start=174 + _MESSAGEC._serialized_end=190 + _MESSAGEWITHMAP._serialized_start=193 + _MESSAGEWITHMAP._serialized_end=366 + _MESSAGEWITHMAP_FIELD1ENTRY._serialized_start=283 + _MESSAGEWITHMAP_FIELD1ENTRY._serialized_end=366 + _REFERENCESMESSAGEWITHMAP._serialized_start=368 + _REFERENCESMESSAGEWITHMAP._serialized_end=454 # @@protoc_insertion_point(module_scope) From e6af718bbbd09c3e0740d0dd1f3c90a139ead55c Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 17 Feb 2023 15:33:43 +0000 Subject: [PATCH 06/64] update proto file --- .../coders/proto2_coder_test_messages_pb2.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py b/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py index 547914f04a1f..88a6f15dd8c9 100644 --- a/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py +++ b/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# source: proto2_coder_test_messages.proto +# source: apache_beam/coders/proto2_coder_test_messages.proto """Generated protocol buffer code.""" from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor @@ -13,10 +13,10 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n proto2_coder_test_messages.proto\x12\x1aproto2_coder_test_messages\"P\n\x08MessageA\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\t\x12\x34\n\x06\x66ield2\x18\x02 \x03(\x0b\x32$.proto2_coder_test_messages.MessageB\"\x1a\n\x08MessageB\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x08\"\x10\n\x08MessageC*\x04\x08\x64\x10j\"\xad\x01\n\x0eMessageWithMap\x12\x46\n\x06\x66ield1\x18\x01 \x03(\x0b\x32\x36.proto2_coder_test_messages.MessageWithMap.Field1Entry\x1aS\n\x0b\x46ield1Entry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:\x02\x38\x01\"V\n\x18ReferencesMessageWithMap\x12:\n\x06\x66ield1\x18\x01 \x03(\x0b\x32*.proto2_coder_test_messages.MessageWithMap:Z\n\x06\x66ield1\x12$.proto2_coder_test_messages.MessageC\x18\x65 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:Z\n\x06\x66ield2\x12$.proto2_coder_test_messages.MessageC\x18\x66 \x01(\x0b\x32$.proto2_coder_test_messages.MessageBB)\n\'org.apache.beam.sdk.extensions.protobuf') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n3apache_beam/coders/proto2_coder_test_messages.proto\x12\x1aproto2_coder_test_messages\"P\n\x08MessageA\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\t\x12\x34\n\x06\x66ield2\x18\x02 \x03(\x0b\x32$.proto2_coder_test_messages.MessageB\"\x1a\n\x08MessageB\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x08\"\x10\n\x08MessageC*\x04\x08\x64\x10j\"\xad\x01\n\x0eMessageWithMap\x12\x46\n\x06\x66ield1\x18\x01 \x03(\x0b\x32\x36.proto2_coder_test_messages.MessageWithMap.Field1Entry\x1aS\n\x0b\x46ield1Entry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:\x02\x38\x01\"V\n\x18ReferencesMessageWithMap\x12:\n\x06\x66ield1\x18\x01 \x03(\x0b\x32*.proto2_coder_test_messages.MessageWithMap:Z\n\x06\x66ield1\x12$.proto2_coder_test_messages.MessageC\x18\x65 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:Z\n\x06\x66ield2\x12$.proto2_coder_test_messages.MessageC\x18\x66 \x01(\x0b\x32$.proto2_coder_test_messages.MessageBB)\n\'org.apache.beam.sdk.extensions.protobuf') _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'proto2_coder_test_messages_pb2', globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'apache_beam.coders.proto2_coder_test_messages_pb2', globals()) if _descriptor._USE_C_DESCRIPTORS == False: MessageC.RegisterExtension(field1) MessageC.RegisterExtension(field2) @@ -25,16 +25,16 @@ DESCRIPTOR._serialized_options = b'\n\'org.apache.beam.sdk.extensions.protobuf' _MESSAGEWITHMAP_FIELD1ENTRY._options = None _MESSAGEWITHMAP_FIELD1ENTRY._serialized_options = b'8\001' - _MESSAGEA._serialized_start=64 - _MESSAGEA._serialized_end=144 - _MESSAGEB._serialized_start=146 - _MESSAGEB._serialized_end=172 - _MESSAGEC._serialized_start=174 - _MESSAGEC._serialized_end=190 - _MESSAGEWITHMAP._serialized_start=193 - _MESSAGEWITHMAP._serialized_end=366 - _MESSAGEWITHMAP_FIELD1ENTRY._serialized_start=283 - _MESSAGEWITHMAP_FIELD1ENTRY._serialized_end=366 - _REFERENCESMESSAGEWITHMAP._serialized_start=368 - _REFERENCESMESSAGEWITHMAP._serialized_end=454 + _MESSAGEA._serialized_start=83 + _MESSAGEA._serialized_end=163 + _MESSAGEB._serialized_start=165 + _MESSAGEB._serialized_end=191 + _MESSAGEC._serialized_start=193 + _MESSAGEC._serialized_end=209 + _MESSAGEWITHMAP._serialized_start=212 + _MESSAGEWITHMAP._serialized_end=385 + _MESSAGEWITHMAP_FIELD1ENTRY._serialized_start=302 + _MESSAGEWITHMAP_FIELD1ENTRY._serialized_end=385 + _REFERENCESMESSAGEWITHMAP._serialized_start=387 + _REFERENCESMESSAGEWITHMAP._serialized_end=473 # @@protoc_insertion_point(module_scope) From 9c441412cea3845448c2bbfe77112fe2b00e24d9 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 17 Feb 2023 11:18:14 -0500 Subject: [PATCH 07/64] Remove redundant code --- sdks/python/gen_protos.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 1666ecd7015d..2b3c272a92c5 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -234,8 +234,6 @@ def write_message(self, message_name, message, indent=0): with ctx.indent(): for obj_name, obj in inspect.getmembers(message): - if 'MonitoringInfoSpecs' in str(message): - pass if self.is_message_type(obj): ctx.lines += self.write_message(obj_name, obj, ctx._indent) elif self.is_enum_type(obj): @@ -256,8 +254,6 @@ def write_message(self, message_name, message, indent=0): for pb2_file in pb2_files: modname = os.path.splitext(pb2_file)[0] - if 'metric' in modname: - pass out_file = modname + '_urns.py' api_start_idx = modname.index(os.path.sep + 'api' + os.path.sep) import_path = modname[api_start_idx + 1:].replace(os.path.sep, '.') From 5860711c3ec5d40419ed5c943be6ae6bc913aa45 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 17 Feb 2023 11:18:54 -0500 Subject: [PATCH 08/64] Add encoding --- sdks/python/gen_protos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 2b3c272a92c5..06efe2a1ec65 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -538,7 +538,7 @@ def generate_proto_files(force=False): ('_pb2.py', '_pb2_grpc.py', '_pb2.pyi')): proto_packages.add(os.path.dirname(file_path)) lines = [] - with open(file_path) as f: + with open(file_path, encoding='utf-8') as f: for line in f: match_obj = compiled_import_re.match(line) if match_obj and \ From fd4176f02e0810990f68515d8fa52a6ad6e2b6d2 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 17 Feb 2023 11:51:15 -0500 Subject: [PATCH 09/64] Update protobuf --- sdks/python/setup.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 61b5df0d7f52..35ab4ad43866 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -180,18 +180,6 @@ def get_portability_package_data(): generate_protos_first() # Keep all dependencies inlined in the setup call, otherwise Dependabot won't # be able to parse it. - # if sys.platform == 'darwin' and ( - # sys.version_info.major == 3 and sys.version_info.minor == 10): - # # TODO (https://github.com/apache/beam/issues/23585): Protobuf wheels - # # for version 3.19.5, 3.19.6 and 3.20.x on Python 3.10 and MacOS are - # # rolled back due to some errors on MacOS. So, for Python 3.10 on MacOS - # # restrict the protobuf with tight upper bound(3.19.4) - # protobuf_dependency = ['protobuf>3.12.2,<3.19.5'] - # else: - # protobuf_dependency = ['protobuf>3.12.2,<4'] - - protobuf_dependency = ['protobuf>=4.21.1,<=4.21.11'] - setuptools.setup( name=PACKAGE_NAME, version=PACKAGE_VERSION, @@ -229,7 +217,7 @@ def get_portability_package_data(): 'apache_beam/utils/counters.py', 'apache_beam/utils/windowed_value.py', ]), - install_requires= protobuf_dependency + [ + install_requires = [ 'crcmod>=1.7,<2.0', 'orjson<4.0', # Dill doesn't have forwards-compatibility guarantees within minor @@ -254,6 +242,7 @@ def get_portability_package_data(): 'objsize>=0.6.1,<0.7.0', 'pymongo>=3.8.0,<4.0.0', 'proto-plus>=1.7.1,<2', + 'protobuf>=4.00,<=5', 'pydot>=1.2.0,<2', 'python-dateutil>=2.8.0,<3', 'pytz>=2018.3', From 2cf5526e3e6f16a5b1003f537f7381c9c472f802 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 17 Feb 2023 14:15:32 -0500 Subject: [PATCH 10/64] Remove tensorflow ad google cloud profiler --- sdks/python/container/base_image_requirements_manual.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index b3612fa6af39..3722841507f2 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -32,9 +32,6 @@ cython<1 # some versions of libraries that launch Beam pipelines, like tensorflow-transform. # Leaving 'future' in our containers for now prevent breaking tft users. future -# TODO: Remove the upper bound once Tensorflow 2.11 is released. -# https://github.com/apache/beam/issues/23355 -google-cloud-profiler<4.0.0 guppy3 # Memory profiler mmh3 # Optimizes execution of some Beam codepaths. TODO: Make it Beam's dependency. nltk # Commonly used for natural language processing. @@ -42,4 +39,4 @@ nose==1.3.7 # For Dataflow internal testing. TODO: remove this. python-snappy # Optimizes execution of some Beam codepaths. scipy scikit-learn -tensorflow +protobuf From afbe42df2b18a5b9a697e42de6f05edd0fd73ddc Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 17 Feb 2023 15:02:10 -0500 Subject: [PATCH 11/64] update dependencies --- .../py310/base_image_requirements.txt | 71 ++++++------------ .../py37/base_image_requirements.txt | 71 ++++++------------ .../py38/base_image_requirements.txt | 73 ++++++------------- .../py39/base_image_requirements.txt | 71 ++++++------------ sdks/python/setup.py | 14 ++-- 5 files changed, 98 insertions(+), 202 deletions(-) diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 8b3134ecc591..d825efbaa003 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -21,10 +21,8 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==22.2.0 -beautifulsoup4==4.11.1 +beautifulsoup4==4.11.2 bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 @@ -33,7 +31,7 @@ charset-normalizer==3.0.1 click==8.1.3 cloudpickle==2.2.1 crcmod==1.7 -cryptography==39.0.0 +cryptography==39.0.1 Cython==0.29.33 deprecation==2.1.0 dill==0.3.1.1 @@ -41,62 +39,49 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.0 +fastavro==1.7.1 fasteners==0.18 -flatbuffers==23.1.21 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.0 -google-api-python-client==2.74.0 google-apitools==0.5.31 google-auth==2.16.0 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 -google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery==3.5.0 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 -google-cloud-datastore==1.15.5 +google-cloud-datastore==2.13.2 google-cloud-dlp==3.11.1 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 -google-cloud-pubsub==2.14.0 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 -google-cloud-spanner==3.27.0 -google-cloud-videointelligence==1.16.3 +google-cloud-language==2.9.0 +google-cloud-pubsub==2.14.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 +google-cloud-spanner==3.27.1 +google-cloud-videointelligence==2.10.1 google-cloud-vision==3.3.1 google-crc32c==1.5.0 -google-pasta==0.2.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 -greenlet==2.0.1 +greenlet==2.0.2 grpc-google-iam-v1==0.12.6 grpcio==1.51.1 -grpcio-status==1.48.2 +grpcio-status==1.51.1 guppy3==3.1.2 -h5py==3.8.0 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.65.0 +hypothesis==6.68.2 idna==3.4 iniconfig==2.0.0 joblib==1.2.0 -keras==2.11.0 -libclang==15.0.6.1 -Markdown==3.4.1 -MarkupSafe==2.1.2 mmh3==3.0.0 mock==2.0.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.0 +numpy==1.24.2 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.8.5 +orjson==3.8.6 overrides==6.5.0 packaging==23.0 pandas==1.5.3 @@ -104,7 +89,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==3.19.6 +protobuf==4.22.0 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -117,7 +102,7 @@ pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 pytest==7.2.1 -pytest-forked==1.4.0 +pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 python-dateutil==2.8.2 @@ -127,31 +112,21 @@ PyYAML==6.0 regex==2022.10.31 requests==2.28.2 requests-mock==1.10.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.2.1 scipy==1.10.0 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.3.2.post1 +soupsieve==2.4 SQLAlchemy==1.4.46 sqlparse==0.4.3 tenacity==5.1.5 -tensorboard==2.11.2 -tensorboard-data-server==0.6.1 -tensorboard-plugin-wit==1.8.1 -tensorflow==2.11.0 -tensorflow-estimator==2.11.0 -tensorflow-io-gcs-filesystem==0.30.0 -termcolor==2.2.0 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 tqdm==4.64.1 -typing_extensions==4.4.0 -uritemplate==4.1.1 +typing_extensions==4.5.0 urllib3==1.26.14 -websocket-client==1.4.2 -Werkzeug==2.2.2 +websocket-client==1.5.1 wrapt==1.14.1 zstandard==0.19.0 diff --git a/sdks/python/container/py37/base_image_requirements.txt b/sdks/python/container/py37/base_image_requirements.txt index 73866fba4a81..3c67855111bb 100644 --- a/sdks/python/container/py37/base_image_requirements.txt +++ b/sdks/python/container/py37/base_image_requirements.txt @@ -21,10 +21,8 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==22.2.0 -beautifulsoup4==4.11.1 +beautifulsoup4==4.11.2 bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 @@ -33,7 +31,7 @@ charset-normalizer==3.0.1 click==8.1.3 cloudpickle==2.2.1 crcmod==1.7 -cryptography==39.0.0 +cryptography==39.0.1 Cython==0.29.33 deprecation==2.1.0 dill==0.3.1.1 @@ -41,63 +39,50 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.0 +fastavro==1.7.1 fasteners==0.18 -flatbuffers==23.1.21 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.0 -google-api-python-client==2.74.0 google-apitools==0.5.31 google-auth==2.16.0 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 -google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery==3.5.0 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 -google-cloud-datastore==1.15.5 +google-cloud-datastore==2.13.2 google-cloud-dlp==3.11.1 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 -google-cloud-pubsub==2.14.0 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 -google-cloud-spanner==3.27.0 -google-cloud-videointelligence==1.16.3 +google-cloud-language==2.9.0 +google-cloud-pubsub==2.14.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 +google-cloud-spanner==3.27.1 +google-cloud-videointelligence==2.10.1 google-cloud-vision==3.3.1 google-crc32c==1.5.0 -google-pasta==0.2.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 -greenlet==2.0.1 +greenlet==2.0.2 grpc-google-iam-v1==0.12.6 grpcio==1.51.1 -grpcio-status==1.48.2 +grpcio-status==1.51.1 guppy3==3.1.2 -h5py==3.8.0 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.65.0 +hypothesis==6.68.2 idna==3.4 importlib-metadata==6.0.0 iniconfig==2.0.0 joblib==1.2.0 -keras==2.11.0 -libclang==15.0.6.1 -Markdown==3.4.1 -MarkupSafe==2.1.2 mmh3==3.0.0 mock==2.0.0 nltk==3.8.1 nose==1.3.7 numpy==1.21.6 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.8.5 +orjson==3.8.6 overrides==6.5.0 packaging==23.0 pandas==1.3.5 @@ -105,7 +90,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==3.19.6 +protobuf==4.22.0 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -118,7 +103,7 @@ pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 pytest==7.2.1 -pytest-forked==1.4.0 +pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 python-dateutil==2.8.2 @@ -128,32 +113,22 @@ PyYAML==6.0 regex==2022.10.31 requests==2.28.2 requests-mock==1.10.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.0.2 scipy==1.7.3 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.3.2.post1 +soupsieve==2.4 SQLAlchemy==1.4.46 sqlparse==0.4.3 tenacity==5.1.5 -tensorboard==2.11.2 -tensorboard-data-server==0.6.1 -tensorboard-plugin-wit==1.8.1 -tensorflow==2.11.0 -tensorflow-estimator==2.11.0 -tensorflow-io-gcs-filesystem==0.30.0 -termcolor==2.2.0 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 tqdm==4.64.1 -typing_extensions==4.4.0 -uritemplate==4.1.1 +typing_extensions==4.5.0 urllib3==1.26.14 -websocket-client==1.4.2 -Werkzeug==2.2.2 +websocket-client==1.5.1 wrapt==1.14.1 -zipp==3.11.0 +zipp==3.13.0 zstandard==0.19.0 diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt index 81b451dab56a..0c4f34314ea4 100644 --- a/sdks/python/container/py38/base_image_requirements.txt +++ b/sdks/python/container/py38/base_image_requirements.txt @@ -21,10 +21,8 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==22.2.0 -beautifulsoup4==4.11.1 +beautifulsoup4==4.11.2 bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 @@ -33,7 +31,7 @@ charset-normalizer==3.0.1 click==8.1.3 cloudpickle==2.2.1 crcmod==1.7 -cryptography==39.0.0 +cryptography==39.0.1 Cython==0.29.33 deprecation==2.1.0 dill==0.3.1.1 @@ -41,63 +39,49 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.0 +fastavro==1.7.1 fasteners==0.18 -flatbuffers==23.1.21 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.0 -google-api-python-client==2.74.0 google-apitools==0.5.31 google-auth==2.16.0 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 -google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery==3.5.0 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 -google-cloud-datastore==1.15.5 +google-cloud-datastore==2.13.2 google-cloud-dlp==3.11.1 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 -google-cloud-pubsub==2.14.0 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 -google-cloud-spanner==3.27.0 -google-cloud-videointelligence==1.16.3 +google-cloud-language==2.9.0 +google-cloud-pubsub==2.14.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 +google-cloud-spanner==3.27.1 +google-cloud-videointelligence==2.10.1 google-cloud-vision==3.3.1 google-crc32c==1.5.0 -google-pasta==0.2.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 -greenlet==2.0.1 +greenlet==2.0.2 grpc-google-iam-v1==0.12.6 grpcio==1.51.1 -grpcio-status==1.48.2 +grpcio-status==1.51.1 guppy3==3.1.2 -h5py==3.8.0 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.65.0 +hypothesis==6.68.2 idna==3.4 -importlib-metadata==6.0.0 iniconfig==2.0.0 joblib==1.2.0 -keras==2.11.0 -libclang==15.0.6.1 -Markdown==3.4.1 -MarkupSafe==2.1.2 mmh3==3.0.0 mock==2.0.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.0 +numpy==1.24.2 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.8.5 +orjson==3.8.6 overrides==6.5.0 packaging==23.0 pandas==1.5.3 @@ -105,7 +89,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==3.19.6 +protobuf==4.22.0 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -118,7 +102,7 @@ pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 pytest==7.2.1 -pytest-forked==1.4.0 +pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 python-dateutil==2.8.2 @@ -128,32 +112,21 @@ PyYAML==6.0 regex==2022.10.31 requests==2.28.2 requests-mock==1.10.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.2.1 scipy==1.10.0 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.3.2.post1 +soupsieve==2.4 SQLAlchemy==1.4.46 sqlparse==0.4.3 tenacity==5.1.5 -tensorboard==2.11.2 -tensorboard-data-server==0.6.1 -tensorboard-plugin-wit==1.8.1 -tensorflow==2.11.0 -tensorflow-estimator==2.11.0 -tensorflow-io-gcs-filesystem==0.30.0 -termcolor==2.2.0 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 tqdm==4.64.1 -typing_extensions==4.4.0 -uritemplate==4.1.1 +typing_extensions==4.5.0 urllib3==1.26.14 -websocket-client==1.4.2 -Werkzeug==2.2.2 +websocket-client==1.5.1 wrapt==1.14.1 -zipp==3.11.0 zstandard==0.19.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index fa18cdab35f0..4ed0914dd5bf 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -21,10 +21,8 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==22.2.0 -beautifulsoup4==4.11.1 +beautifulsoup4==4.11.2 bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 @@ -41,63 +39,49 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.0 +fastavro==1.7.1 fasteners==0.18 -flatbuffers==23.1.21 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.0 -google-api-python-client==2.74.0 google-apitools==0.5.31 google-auth==2.16.0 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 -google-cloud-bigquery==3.4.2 -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery==3.5.0 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 -google-cloud-datastore==1.15.5 +google-cloud-datastore==2.13.2 google-cloud-dlp==3.11.1 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 -google-cloud-pubsub==2.14.0 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 -google-cloud-spanner==3.27.0 -google-cloud-videointelligence==1.16.3 +google-cloud-language==2.9.0 +google-cloud-pubsub==2.14.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 +google-cloud-spanner==3.27.1 +google-cloud-videointelligence==2.10.1 google-cloud-vision==3.3.1 google-crc32c==1.5.0 -google-pasta==0.2.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 -greenlet==2.0.1 +greenlet==2.0.2 grpc-google-iam-v1==0.12.6 grpcio==1.51.1 -grpcio-status==1.48.2 +grpcio-status==1.51.1 guppy3==3.1.2 -h5py==3.8.0 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.65.0 +hypothesis==6.68.2 idna==3.4 -importlib-metadata==6.0.0 iniconfig==2.0.0 joblib==1.2.0 -keras==2.11.0 -libclang==15.0.6.1 -Markdown==3.4.1 -MarkupSafe==2.1.2 mmh3==3.0.0 mock==2.0.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.0 +numpy==1.24.2 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.8.5 +orjson==3.8.6 overrides==6.5.0 packaging==23.0 pandas==1.5.3 @@ -105,7 +89,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==3.19.6 +protobuf==4.22.0 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -118,7 +102,7 @@ pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 pytest==7.2.1 -pytest-forked==1.4.0 +pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 python-dateutil==2.8.2 @@ -128,32 +112,21 @@ PyYAML==6.0 regex==2022.10.31 requests==2.28.2 requests-mock==1.10.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.2.1 scipy==1.10.0 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.3.2.post1 +soupsieve==2.4 SQLAlchemy==1.4.46 sqlparse==0.4.3 tenacity==5.1.5 -tensorboard==2.11.2 -tensorboard-data-server==0.6.1 -tensorboard-plugin-wit==1.8.1 -tensorflow==2.11.0 -tensorflow-estimator==2.11.0 -tensorflow-io-gcs-filesystem==0.30.0 -termcolor==2.2.0 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 tqdm==4.64.1 -typing_extensions==4.4.0 -uritemplate==4.1.1 +typing_extensions==4.5.0 urllib3==1.26.14 -websocket-client==1.4.2 -Werkzeug==2.2.2 +websocket-client==1.5.1 wrapt==1.14.1 -zipp==3.11.0 zstandard==0.19.0 diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 35ab4ad43866..98cec9fad46d 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -291,21 +291,21 @@ def get_portability_package_data(): # https://github.com/googleapis/google-cloud-python/issues/10566 'google-auth>=1.18.0,<3', 'google-auth-httplib2>=0.1.0,<0.2.0', - 'google-cloud-datastore>=1.8.0,<2', + 'google-cloud-datastore>=1.8.0,<3', 'google-cloud-pubsub>=2.1.0,<3', 'google-cloud-pubsublite>=1.2.0,<2', # GCP packages required by tests 'google-cloud-bigquery>=1.6.0,<4', - 'google-cloud-bigquery-storage>=2.6.3,<2.17', + 'google-cloud-bigquery-storage>=2.6.3,<2.19', 'google-cloud-core>=0.28.1,<3', - 'google-cloud-bigtable>=0.31.1,<2', + 'google-cloud-bigtable>=0.31.1,<3', 'google-cloud-spanner>=3.0.0,<4', # GCP Packages required by ML functionality - 'google-cloud-dlp>=3.0.0,<4', - 'google-cloud-language>=1.3.0,<2', - 'google-cloud-videointelligence>=1.8.0,<2', + 'google-cloud-dlp>=3.0.0,<4.0', + 'google-cloud-language>=1.3.0,<3.0', + 'google-cloud-videointelligence>=1.8.0,<3.0', 'google-cloud-vision>=2,<4', - 'google-cloud-recommendations-ai>=0.1.0,<0.8.0' + 'google-cloud-recommendations-ai>=0.1.0,<1.0' ], 'interactive': [ 'facets-overview>=1.0.0,<2', From ab20544d47614376351a169044154347875b74b1 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Sat, 18 Feb 2023 11:44:45 -0500 Subject: [PATCH 12/64] update facets-overview --- sdks/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 98cec9fad46d..857a96f8a91e 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -308,7 +308,7 @@ def get_portability_package_data(): 'google-cloud-recommendations-ai>=0.1.0,<1.0' ], 'interactive': [ - 'facets-overview>=1.0.0,<2', + 'facets-overview>=1.1.0,<2', 'google-cloud-dataproc>=3.0.0,<3.2.0', # IPython>=8 is not compatible with Python<=3.7 'ipython>=7,<8;python_version<="3.7"', From 990cf94e9f278571872b2a242999f2c37ea3ac26 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Sun, 19 Feb 2023 12:28:38 -0500 Subject: [PATCH 13/64] Update goolge-cloud-langugage code to support breaking changes from v2.0 --- .../apache_beam/ml/gcp/naturallanguageml.py | 16 ++++++++-------- .../apache_beam/ml/gcp/naturallanguageml_test.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py index 7817eb9c4c23..ecc4bfdafd85 100644 --- a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py +++ b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py @@ -26,8 +26,7 @@ try: from google.cloud import language - from google.cloud.language import enums # pylint: disable=unused-import - from google.cloud.language import types + from google.cloud import language_v1 except ImportError: raise ImportError( 'Google Cloud Natural Language API not supported for this execution ' @@ -42,7 +41,7 @@ class Document(object): Args: content (str): The content of the input or the Google Cloud Storage URI where the file is stored. - type (`Union[str, google.cloud.language.enums.Document.Type]`): Text type. + type (`Union[str, google.cloud.language_v1.Document.Type]`): Text type. Possible values are `HTML`, `PLAIN_TEXT`. The default value is `PLAIN_TEXT`. language_hint (`Optional[str]`): The language of the text. If not specified, @@ -57,7 +56,7 @@ class Document(object): def __init__( self, content, # type: str - type='PLAIN_TEXT', # type: Union[str, enums.Document.Type] + type='PLAIN_TEXT', # type: Union[str, language_v1.Document.Type] language_hint=None, # type: Optional[str] encoding='UTF8', # type: Optional[str] from_gcs=False # type: bool @@ -84,7 +83,7 @@ def to_dict(document): @beam.ptransform_fn def AnnotateText( pcoll, # type: beam.pvalue.PCollection - features, # type: Union[Mapping[str, bool], types.AnnotateTextRequest.Features] + features, # type: Union[Mapping[str, bool], language_v1.AnnotateTextRequest.Features] timeout=None, # type: Optional[float] metadata=None # type: Optional[Sequence[Tuple[str, str]]] ): @@ -95,7 +94,8 @@ def AnnotateText( Args: pcoll (:class:`~apache_beam.pvalue.PCollection`): An input PCollection of :class:`Document` objects. - features (`Union[Mapping[str, bool], types.AnnotateTextRequest.Features]`): + features (`Union[Mapping[str, bool], + language_v1.AnnotateTextRequest.Features]`): A dictionary of natural language operations to be performed on given text in the following format:: @@ -111,11 +111,11 @@ def AnnotateText( @beam.typehints.with_input_types(Document) -@beam.typehints.with_output_types(types.AnnotateTextResponse) +@beam.typehints.with_output_types(language_v1.AnnotateTextResponse) class _AnnotateTextFn(beam.DoFn): def __init__( self, - features, # type: Union[Mapping[str, bool], types.AnnotateTextRequest.Features] + features, # type: Union[Mapping[str, bool], language_v1.AnnotateTextRequest.Features] timeout, # type: Optional[float] metadata=None # type: Optional[Sequence[Tuple[str, str]]] ): diff --git a/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py b/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py index e6395176091f..bad7443d0d94 100644 --- a/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py +++ b/sdks/python/apache_beam/ml/gcp/naturallanguageml_test.py @@ -65,7 +65,7 @@ def test_annotate_test_called(self): '._get_api_client'): p = TestPipeline() features = [ - naturallanguageml.types.AnnotateTextRequest.Features( + naturallanguageml.language_v1.AnnotateTextRequest.Features( extract_syntax=True) ] _ = ( From 7eab9258c6c1db36c036bbb186519c85f6c53c43 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Sun, 19 Feb 2023 14:35:20 -0500 Subject: [PATCH 14/64] Update video intelligence breaking changes --- .../apache_beam/ml/gcp/videointelligenceml.py | 18 +++++++++--------- .../ml/gcp/videointelligenceml_test.py | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sdks/python/apache_beam/ml/gcp/videointelligenceml.py b/sdks/python/apache_beam/ml/gcp/videointelligenceml.py index fb0d7f045dde..ebd35d2426c0 100644 --- a/sdks/python/apache_beam/ml/gcp/videointelligenceml.py +++ b/sdks/python/apache_beam/ml/gcp/videointelligenceml.py @@ -63,7 +63,7 @@ def __init__( context_side_input=None): """ Args: - features: (List[``videointelligence_v1.enums.Feature``]) Required. + features: (List[``videointelligence_v1.Feature``]) Required. The Video Intelligence API features to detect location_id: (str) Optional. Cloud region where annotation should take place. @@ -82,9 +82,9 @@ def __init__( video_contexts = [('gs://cloud-samples-data/video/cat.mp4', Union[dict, - ``videointelligence_v1.types.VideoContext``]), + ``videointelligence_v1.VideoContext``]), ('gs://some-other-video/sample.mp4', Union[dict, - ``videointelligence_v1.types.VideoContext``]),] + ``videointelligence_v1.VideoContext``]),] context_side_input = ( @@ -113,11 +113,11 @@ def expand(self, pvalue): @typehints.with_input_types( - Union[str, bytes], Optional[videointelligence.types.VideoContext]) + Union[str, bytes], Optional[videointelligence.VideoContext]) class _VideoAnnotateFn(DoFn): """A DoFn that sends each input element to the GCP Video Intelligence API service and outputs an element with the return result of the API - (``google.cloud.videointelligence_v1.types.AnnotateVideoResponse``). + (``google.cloud.videointelligence_v1.AnnotateVideoResponse``). """ def __init__(self, features, location_id, metadata, timeout): super().__init__() @@ -166,7 +166,7 @@ class AnnotateVideoWithContext(AnnotateVideo): Element is a tuple of (Union[str, bytes], - Optional[videointelligence.types.VideoContext]) + Optional[videointelligence.VideoContext]) where the former is either an URI (e.g. a GCS URI) or bytes base64-encoded video data @@ -174,7 +174,7 @@ class AnnotateVideoWithContext(AnnotateVideo): def __init__(self, features, location_id=None, metadata=None, timeout=120): """ Args: - features: (List[``videointelligence_v1.enums.Feature``]) Required. + features: (List[``videointelligence_v1.Feature``]) Required. the Video Intelligence API features to detect location_id: (str) Optional. Cloud region where annotation should take place. @@ -202,12 +202,12 @@ def expand(self, pvalue): @typehints.with_input_types( - Tuple[Union[str, bytes], Optional[videointelligence.types.VideoContext]]) + Tuple[Union[str, bytes], Optional[videointelligence.VideoContext]]) class _VideoAnnotateFnWithContext(_VideoAnnotateFn): """A DoFn that unpacks each input tuple to element, video_context variables and sends these to the GCP Video Intelligence API service and outputs an element with the return result of the API - (``google.cloud.videointelligence_v1.types.AnnotateVideoResponse``). + (``google.cloud.videointelligence_v1.AnnotateVideoResponse``). """ def __init__(self, features, location_id, metadata, timeout): super().__init__( diff --git a/sdks/python/apache_beam/ml/gcp/videointelligenceml_test.py b/sdks/python/apache_beam/ml/gcp/videointelligenceml_test.py index 3215cebb4a88..79c841938cdb 100644 --- a/sdks/python/apache_beam/ml/gcp/videointelligenceml_test.py +++ b/sdks/python/apache_beam/ml/gcp/videointelligenceml_test.py @@ -47,11 +47,11 @@ def setUp(self): self.m2 = mock.Mock() self.m2.result.return_value = None self._mock_client.annotate_video.return_value = self.m2 - self.features = [videointelligence.enums.Feature.LABEL_DETECTION] + self.features = [videointelligence.Feature.LABEL_DETECTION] self.location_id = 'us-west1' - config = videointelligence.types.SpeechTranscriptionConfig( + config = videointelligence.SpeechTranscriptionConfig( language_code='en-US', enable_automatic_punctuation=True) - self.video_ctx = videointelligence.types.VideoContext( + self.video_ctx = videointelligence.VideoContext( speech_transcription_config=config) def test_AnnotateVideo_with_side_input_context(self): From 073788d3fc76023b36d9640baf17a637e9e6cb4e Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Sun, 19 Feb 2023 14:35:57 -0500 Subject: [PATCH 15/64] Update lower bounds on some gcp deps --- sdks/python/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 857a96f8a91e..bd68f29646f7 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -302,8 +302,8 @@ def get_portability_package_data(): 'google-cloud-spanner>=3.0.0,<4', # GCP Packages required by ML functionality 'google-cloud-dlp>=3.0.0,<4.0', - 'google-cloud-language>=1.3.0,<3.0', - 'google-cloud-videointelligence>=1.8.0,<3.0', + 'google-cloud-language>=2.0,<3.0', + 'google-cloud-videointelligence>=2.0,<3.0', 'google-cloud-vision>=2,<4', 'google-cloud-recommendations-ai>=0.1.0,<1.0' ], From 30b9b7f0fe202fa9d9ea9b5aa48296e07f209cdd Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Sun, 19 Feb 2023 15:37:04 -0500 Subject: [PATCH 16/64] Update deps --- .../py310/base_image_requirements.txt | 44 ++++++++----------- .../py37/base_image_requirements.txt | 23 ++++------ .../py38/base_image_requirements.txt | 22 ++++------ .../py39/base_image_requirements.txt | 22 ++++------ 4 files changed, 44 insertions(+), 67 deletions(-) diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index cf67752e5f7c..430a26a725c6 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -43,38 +43,34 @@ fastavro==1.7.1 fasteners==0.18 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 -google-api-core==2.8.2 -google-api-python-client==2.78.0 +google-api-core==2.11.0 google-apitools==0.5.31 -google-auth==2.16.0 +google-auth==2.16.1 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 -google-cloud-bigquery==3.3.3 -google-cloud-bigquery-storage==2.16.0 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery==3.5.0 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 -google-cloud-datastore==1.15.5 -google-cloud-dlp==3.9.0 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 -google-cloud-pubsub==2.13.7 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 -google-cloud-spanner==3.22.0 -google-cloud-videointelligence==1.16.3 -google-cloud-vision==3.1.2 +google-cloud-datastore==2.13.2 +google-cloud-dlp==3.11.1 +google-cloud-language==2.9.0 +google-cloud-pubsub==2.14.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 +google-cloud-spanner==3.27.1 +google-cloud-videointelligence==2.10.1 +google-cloud-vision==3.3.1 google-crc32c==1.5.0 google-resumable-media==2.4.1 -googleapis-common-protos==1.56.4 +googleapis-common-protos==1.58.0 greenlet==2.0.2 -grpc-google-iam-v1==0.12.4 +grpc-google-iam-v1==0.12.6 grpcio==1.51.1 grpcio-status==1.51.1 guppy3==3.1.2 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.68.1 +hypothesis==6.68.2 idna==3.4 iniconfig==2.0.0 joblib==1.2.0 @@ -85,16 +81,15 @@ nose==1.3.7 numpy==1.24.2 oauth2client==4.1.3 objsize==0.6.1 -opt-einsum==3.3.0 orjson==3.8.6 overrides==6.5.0 -packaging==21.3 +packaging==23.0 pandas==1.5.3 parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==3.19.4 +protobuf==4.22.0 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -133,6 +128,5 @@ tqdm==4.64.1 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 -Werkzeug==2.2.3 wrapt==1.14.1 zstandard==0.19.0 diff --git a/sdks/python/container/py37/base_image_requirements.txt b/sdks/python/container/py37/base_image_requirements.txt index 5c053d0eeb19..c89a571ed28f 100644 --- a/sdks/python/container/py37/base_image_requirements.txt +++ b/sdks/python/container/py37/base_image_requirements.txt @@ -44,24 +44,21 @@ fasteners==0.18 freezegun==1.2.2 future==0.18.3 google-api-core==2.11.0 -google-api-python-client==2.78.0 google-apitools==0.5.31 -google-auth==2.16.0 +google-auth==2.16.1 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 google-cloud-bigquery==3.5.0 -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 google-cloud-datastore==2.13.2 google-cloud-dlp==3.11.1 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 +google-cloud-language==2.9.0 google-cloud-pubsub==2.14.1 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 google-cloud-spanner==3.27.1 -google-cloud-videointelligence==1.16.3 +google-cloud-videointelligence==2.10.1 google-cloud-vision==3.3.1 google-crc32c==1.5.0 google-resumable-media==2.4.1 @@ -73,7 +70,7 @@ grpcio-status==1.51.1 guppy3==3.1.2 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.68.1 +hypothesis==6.68.2 idna==3.4 importlib-metadata==6.0.0 iniconfig==2.0.0 @@ -85,7 +82,6 @@ nose==1.3.7 numpy==1.21.6 oauth2client==4.1.3 objsize==0.6.1 -opt-einsum==3.3.0 orjson==3.8.6 overrides==6.5.0 packaging==23.0 @@ -133,7 +129,6 @@ tqdm==4.64.1 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 -Werkzeug==2.2.3 wrapt==1.14.1 -zipp==3.13.0 +zipp==3.14.0 zstandard==0.19.0 diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt index 78051ccbedbe..31a95bc4e974 100644 --- a/sdks/python/container/py38/base_image_requirements.txt +++ b/sdks/python/container/py38/base_image_requirements.txt @@ -44,24 +44,21 @@ fasteners==0.18 freezegun==1.2.2 future==0.18.3 google-api-core==2.11.0 -google-api-python-client==2.78.0 google-apitools==0.5.31 -google-auth==2.16.0 +google-auth==2.16.1 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 google-cloud-bigquery==3.5.0 -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 google-cloud-datastore==2.13.2 google-cloud-dlp==3.11.1 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 +google-cloud-language==2.9.0 google-cloud-pubsub==2.14.1 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 google-cloud-spanner==3.27.1 -google-cloud-videointelligence==1.16.3 +google-cloud-videointelligence==2.10.1 google-cloud-vision==3.3.1 google-crc32c==1.5.0 google-resumable-media==2.4.1 @@ -73,7 +70,7 @@ grpcio-status==1.51.1 guppy3==3.1.2 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.68.1 +hypothesis==6.68.2 idna==3.4 iniconfig==2.0.0 joblib==1.2.0 @@ -84,7 +81,6 @@ nose==1.3.7 numpy==1.24.2 oauth2client==4.1.3 objsize==0.6.1 -opt-einsum==3.3.0 orjson==3.8.6 overrides==6.5.0 packaging==23.0 @@ -132,7 +128,5 @@ tqdm==4.64.1 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 -Werkzeug==2.2.3 wrapt==1.14.1 -zipp==3.13.0 zstandard==0.19.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index e147151fe944..2bf4c9e6ea0f 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -44,24 +44,21 @@ fasteners==0.18 freezegun==1.2.2 future==0.18.3 google-api-core==2.11.0 -google-api-python-client==2.78.0 google-apitools==0.5.31 -google-auth==2.16.0 +google-auth==2.16.1 google-auth-httplib2==0.1.0 -google-auth-oauthlib==0.4.6 google-cloud-bigquery==3.5.0 -google-cloud-bigquery-storage==2.16.2 -google-cloud-bigtable==1.7.3 +google-cloud-bigquery-storage==2.18.1 +google-cloud-bigtable==2.15.0 google-cloud-core==2.3.2 google-cloud-datastore==2.13.2 google-cloud-dlp==3.11.1 -google-cloud-language==1.3.2 -google-cloud-profiler==3.1.0 +google-cloud-language==2.9.0 google-cloud-pubsub==2.14.1 -google-cloud-pubsublite==1.6.0 -google-cloud-recommendations-ai==0.7.1 +google-cloud-pubsublite==1.7.0 +google-cloud-recommendations-ai==0.10.1 google-cloud-spanner==3.27.1 -google-cloud-videointelligence==1.16.3 +google-cloud-videointelligence==2.10.1 google-cloud-vision==3.3.1 google-crc32c==1.5.0 google-resumable-media==2.4.1 @@ -73,7 +70,7 @@ grpcio-status==1.51.1 guppy3==3.1.2 hdfs==2.7.0 httplib2==0.21.0 -hypothesis==6.68.1 +hypothesis==6.68.2 idna==3.4 iniconfig==2.0.0 joblib==1.2.0 @@ -84,7 +81,6 @@ nose==1.3.7 numpy==1.24.2 oauth2client==4.1.3 objsize==0.6.1 -opt-einsum==3.3.0 orjson==3.8.6 overrides==6.5.0 packaging==23.0 @@ -132,7 +128,5 @@ tqdm==4.64.1 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 -Werkzeug==2.2.3 wrapt==1.14.1 -zipp==3.13.0 zstandard==0.19.0 From 882e5afc2b81c9efc047df7ca52c6c2db5c3cd83 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Tue, 21 Feb 2023 09:28:01 -0500 Subject: [PATCH 17/64] Debug --- sdks/python/gen_protos.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 06efe2a1ec65..0cea10f8ab78 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -38,7 +38,7 @@ LOG = logging.getLogger() LOG.setLevel(logging.INFO) -os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' +os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'upb' LICENSE_HEADER = """ # @@ -203,11 +203,11 @@ def message_repr(self, msg): else: return self.empty_type(type(msg)) - def write_enum(self, enum_name, enum, indent): + def write_enum(self, enum_name, descriptor, indent): ctx = Context(indent=indent) with ctx.indent(): - for v in enum.DESCRIPTOR.values: + for enum_name, v in descriptor.enum_types_by_name.items(): extensions = v.GetOptions().Extensions prop = ( @@ -233,10 +233,12 @@ def write_message(self, message_name, message, indent=0): ctx = Context(indent=indent) with ctx.indent(): + if 'MonitoringInfoSpecs' in str(message): + pass for obj_name, obj in inspect.getmembers(message): if self.is_message_type(obj): ctx.lines += self.write_message(obj_name, obj, ctx._indent) - elif self.is_enum_type(obj): + elif obj_name == 'DESCRIPTOR': ctx.lines += self.write_enum(obj_name, obj, ctx._indent) if ctx.lines: From bf2846c8a09d623208ff97218788b38d9c8e0685 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Tue, 21 Feb 2023 10:45:32 -0500 Subject: [PATCH 18/64] Update gen_protos.py to supoort google._upb --- sdks/python/gen_protos.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 0cea10f8ab78..734a39e780e4 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -38,7 +38,6 @@ LOG = logging.getLogger() LOG.setLevel(logging.INFO) -os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'upb' LICENSE_HEADER = """ # @@ -125,6 +124,7 @@ def generate_urn_files(out_dir, api_path): This is executed at build time rather than dynamically on import to ensure that it is compatible with static type checkers like mypy. """ + from google._upb import _message from google.protobuf import message class Context(object): @@ -180,8 +180,8 @@ def python_repr(self, obj): obj, ( list, - containers.RepeatedScalarFieldContainer, - containers.RepeatedCompositeFieldContainer + _message.RepeatedScalarContainer, + _message.RepeatedCompositeContainer, )): # pylint: disable=c-extension-no-member return '[%s]' % ', '.join(self.python_repr(x) for x in obj) else: @@ -203,13 +203,12 @@ def message_repr(self, msg): else: return self.empty_type(type(msg)) - def write_enum(self, enum_name, descriptor, indent): + def write_enum(self, enum_name, enum, indent): ctx = Context(indent=indent) - with ctx.indent(): - for enum_name, v in descriptor.enum_types_by_name.items(): - extensions = v.GetOptions().Extensions - + for enum_value_name in enum.values_by_name: + enum_value_descriptor = enum.values_by_name[enum_value_name] + extensions = enum_value_descriptor.GetOptions().Extensions prop = ( extensions[beam_runner_api_pb2.beam_urn], extensions[beam_runner_api_pb2.beam_constant], @@ -221,7 +220,7 @@ def write_enum(self, enum_name, descriptor, indent): continue ctx.line( '%s = PropertiesFromEnumValue(%s)' % - (v.name, ', '.join(self.python_repr(x) for x in prop))) + (enum_value_name, ', '.join(self.python_repr(x) for x in prop))) if ctx.lines: ctx.prepend('class %s(object):' % enum_name) @@ -233,13 +232,11 @@ def write_message(self, message_name, message, indent=0): ctx = Context(indent=indent) with ctx.indent(): - if 'MonitoringInfoSpecs' in str(message): - pass for obj_name, obj in inspect.getmembers(message): - if self.is_message_type(obj): - ctx.lines += self.write_message(obj_name, obj, ctx._indent) - elif obj_name == 'DESCRIPTOR': - ctx.lines += self.write_enum(obj_name, obj, ctx._indent) + if obj_name == 'DESCRIPTOR': + for enum_name in obj.enum_types_by_name: + enum = obj.enum_types_by_name[enum_name] + ctx.lines += self.write_enum(enum_name, enum, ctx._indent) if ctx.lines: ctx.prepend('class %s(object):' % message_name) From 3a0c1b76f093d12b951c1ad4ea5ccc2a26ce49c0 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Tue, 21 Feb 2023 11:25:26 -0500 Subject: [PATCH 19/64] Change checking condition to cpp to upb --- sdks/python/container/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index fe5238d22ce3..158b19714142 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -42,8 +42,8 @@ RUN \ pip install --no-deps -r /tmp/base_image_requirements.txt && \ python -c "import nltk; nltk.download('stopwords')" && \ rm /root/nltk_data/corpora/stopwords.zip && \ - # Check that the fast implementation of protobuf is used. - python -c "from google.protobuf.internal import api_implementation; assert api_implementation._default_implementation_type == 'cpp'; print ('Verified fast protobuf used.')" && \ + # Check that the protobuf upb(also called micro protobuf) is used. + python -c "from google.protobuf.internal import api_implementation; assert api_implementation._default_implementation_type == 'upb'; print ('Verified fast protobuf used.')" && \ # Remove pip cache. rm -rf /root/.cache/pip && \ rm -rf /tmp/base_image_requirements.txt From 2614d0eb7eb1a5790b5b30e79d59ce24b5c5586c Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Tue, 21 Feb 2023 16:19:04 -0500 Subject: [PATCH 20/64] Update google-cloud-datastore ByteSize --- .../io/gcp/datastore/v1new/datastoreio.py | 3 ++- .../gcp/datastore/v1new/datastoreio_test.py | 4 ++-- .../io/gcp/datastore/v1new/util.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py index be912ae33761..ab5b836ae5e3 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py @@ -477,7 +477,8 @@ def process(self, element): client_element = self.element_to_client_batch_item(element) self._batch_elements.append(client_element) self.add_to_batch(client_element) - self._batch_bytes_size += self._batch.mutations[-1].ByteSize() + self._batch_bytes_size += util.extract_byte_size( + self._batch.mutations[-1]) if (len(self._batch.mutations) >= self._target_batch_size or self._batch_bytes_size > util.WRITE_BATCH_MAX_BYTES_SIZE): diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py index 8a7977e475cb..40a0cc98026f 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py @@ -66,9 +66,9 @@ def __init__(self, entity=None, key=None): def ByteSize(self): if self.entity is not None: - return helpers.entity_to_protobuf(self.entity).ByteSize() + return util.extract_byte_size(helpers.entity_to_protobuf(self.entity)) else: - return self.key.to_protobuf().ByteSize() + return util.extract_byte_size(self.key.to_protobuf()) class FakeBatch(object): diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py index 06a22143f59d..cb2566aa95b4 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py @@ -137,3 +137,22 @@ def report_latency(self, now, latency_ms, num_mutations): num_mutations: int, number of mutations contained in the RPC. """ self._commit_time_per_entity_ms.add(now, latency_ms / num_mutations) + + +def extract_byte_size(proto_message): + """ + Gets the byte size from a google.protobuf or proto-plus message + + google-cloud-datastore moved from using protobuf to using + proto-plus messages. + protobuf object has attribute ByteSize() but proto.Message() objects + don't. Workaround: + https://github.com/googleapis/proto-plus-python/issues/163 + """ + if hasattr(proto_message, "ByteSize"): + # google.protobuf message + return proto_message.ByteSize() + if hasattr(type(proto_message), "pb"): + # proto-plus message + return type(proto_message).pb(proto_message).ByteSize() + return None From 188a5d6266173892479beeac0339007e0e31f772 Mon Sep 17 00:00:00 2001 From: Anand Inguva <34158215+AnandInguva@users.noreply.github.com> Date: Tue, 21 Feb 2023 17:42:34 -0500 Subject: [PATCH 21/64] Update Dockerfile --- sdks/python/container/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 158b19714142..c9437504588e 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -43,7 +43,7 @@ RUN \ python -c "import nltk; nltk.download('stopwords')" && \ rm /root/nltk_data/corpora/stopwords.zip && \ # Check that the protobuf upb(also called micro protobuf) is used. - python -c "from google.protobuf.internal import api_implementation; assert api_implementation._default_implementation_type == 'upb'; print ('Verified fast protobuf used.')" && \ + python -c "from google.protobuf.internal import api_implementation; assert api_implementation._implementation_type == 'upb'; print ('Verified fast protobuf used.')" && \ # Remove pip cache. rm -rf /root/.cache/pip && \ rm -rf /tmp/base_image_requirements.txt From 2dba13fde362ec77d80e7bdc82a9ead623f7dfd1 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 08:57:47 -0500 Subject: [PATCH 22/64] raise Error --- sdks/python/apache_beam/io/gcp/datastore/v1new/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py index cb2566aa95b4..b9e55ffc87fa 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py @@ -155,4 +155,4 @@ def extract_byte_size(proto_message): if hasattr(type(proto_message), "pb"): # proto-plus message return type(proto_message).pb(proto_message).ByteSize() - return None + return NotImplementedError From b5ac38820f0b987428f7eb9de574cdf121b0d5b4 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 09:12:21 -0500 Subject: [PATCH 23/64] Update dependencies bound --- sdks/python/setup.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index bd68f29646f7..5b8504603734 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -242,7 +242,12 @@ def get_portability_package_data(): 'objsize>=0.6.1,<0.7.0', 'pymongo>=3.8.0,<4.0.0', 'proto-plus>=1.7.1,<2', - 'protobuf>=4.00,<=5', + # use a tighter upper bound in protobuf dependency + # to make sure the minor version at job submission + # does not exceed the minor version at runtime. + # To avoid depending on an old dependency, update the minor version on + # every Beam release, see: https://github.com/apache/beam/issues/25590 + 'protobuf>=4.21.1,<4.23.0', 'pydot>=1.2.0,<2', 'python-dateutil>=2.8.0,<3', 'pytz>=2018.3', @@ -296,20 +301,20 @@ def get_portability_package_data(): 'google-cloud-pubsublite>=1.2.0,<2', # GCP packages required by tests 'google-cloud-bigquery>=1.6.0,<4', - 'google-cloud-bigquery-storage>=2.6.3,<2.19', + 'google-cloud-bigquery-storage>=2.6.3,<3', 'google-cloud-core>=0.28.1,<3', 'google-cloud-bigtable>=0.31.1,<3', 'google-cloud-spanner>=3.0.0,<4', # GCP Packages required by ML functionality - 'google-cloud-dlp>=3.0.0,<4.0', - 'google-cloud-language>=2.0,<3.0', - 'google-cloud-videointelligence>=2.0,<3.0', + 'google-cloud-dlp>=3.0.0,<4', + 'google-cloud-language>=2.0,<3', + 'google-cloud-videointelligence>=2.0,<3', 'google-cloud-vision>=2,<4', - 'google-cloud-recommendations-ai>=0.1.0,<1.0' + 'google-cloud-recommendations-ai>=0.1.0,<1' ], 'interactive': [ 'facets-overview>=1.1.0,<2', - 'google-cloud-dataproc>=3.0.0,<3.2.0', + 'google-cloud-dataproc>=5.0.0,<6', # IPython>=8 is not compatible with Python<=3.7 'ipython>=7,<8;python_version<="3.7"', 'ipython>=8,<9;python_version>"3.7"', From 35631f529b22a7585728d4071eb8daec7b34ea74 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 17:31:14 -0500 Subject: [PATCH 24/64] Add optional field --- .../model/pipeline/v1/beam_runner_api.proto | 32 +++++++++---------- .../beam/model/pipeline/v1/endpoints.proto | 4 +-- .../beam/model/pipeline/v1/schema.proto | 4 +-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto index 644c6d4bda52..7ba39294a279 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto @@ -151,7 +151,7 @@ message PTransform { // is a ParDoPayload, and so on. For some special composite transforms, // the payload is also officially defined. See StandardPTransforms for // details. - FunctionSpec spec = 1; + optional FunctionSpec spec = 1; // (Optional) A list of the ids of transforms that it contains. // @@ -529,12 +529,12 @@ message ParDoPayload { // (Optional) Only set when this ParDo contains a splittable DoFn. // If this is set, the corresponding standard requirement should also // be placed in the pipeline requirements. - string restriction_coder_id = 7; + optional string restriction_coder_id = 7; // (Optional) Only set when this ParDo can request bundle finalization. // If this is set, the corresponding standard requirement should also // be placed in the pipeline requirements. - bool requests_finalization = 8; + optional bool requests_finalization = 8; // Whether this stage requires time sorted input. // If this is set, the corresponding standard requirement should also @@ -662,7 +662,7 @@ message TestStreamPayload { // (Optional) If specified, points to a TestStreamService to be // used to retrieve events. - ApiServiceDescriptor endpoint = 3; + optional ApiServiceDescriptor endpoint = 3; message Event { oneof event { @@ -679,7 +679,7 @@ message TestStreamPayload { // (Optional) The output watermark tag for a PCollection. If unspecified // or with an empty string, this will default to the Main PCollection // Output - string tag = 2; + optional string tag = 2; } // Advances the processing time clock by the specified amount. @@ -696,7 +696,7 @@ message TestStreamPayload { // (Optional) The output PCollection tag to add these elements to. If // unspecified or with an empty string, this will default to the Main // PCollection Output. - string tag = 3; + optional string tag = 3; } } @@ -806,7 +806,7 @@ message GroupIntoBatchesPayload { int64 batch_size_bytes = 3; // (Optional) Max duration a batch is allowed to be cached in states. - int64 max_buffering_duration_millis = 2; + optional int64 max_buffering_duration_millis = 2; } // A coder, the binary format for serialization and deserialization of data in @@ -1156,7 +1156,7 @@ message WindowingStrategy { // (Optional) Environment where the current window_fn should be applied in. // Runner that executes the pipeline may choose to override this if needed. // If not specified, environment will be decided by the runner. - string environment_id = 11; + optional string environment_id = 11; } // Whether or not a PCollection's WindowFn is non-merging, merging, or @@ -1297,10 +1297,10 @@ message Trigger { message AfterEndOfWindow { // (Optional) A trigger governing output prior to the end of the window. - Trigger early_firings = 1; + optional Trigger early_firings = 1; // (Optional) A trigger governing output after the end of the window. - Trigger late_firings = 2; + optional Trigger late_firings = 2; } // After input arrives, ready when the specified delay has passed. @@ -1481,7 +1481,7 @@ message ArtifactUrlPayload { string url = 1; // (Optional) The hex-encoded sha256 checksum of the artifact if available. - string sha256 = 2; + optional string sha256 = 2; } message EmbeddedFilePayload { @@ -1503,7 +1503,7 @@ message MavenPayload { string artifact = 1; // (Optional) Repository URL. If not specified, Maven central is used by default. - string repository_url = 2; + optional string repository_url = 2; } message DeferredArtifactPayload { @@ -1765,7 +1765,7 @@ message FunctionSpec { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - bytes payload = 3; + optional bytes payload = 3; } // A set of well known URNs describing display data. @@ -1829,7 +1829,7 @@ message DisplayData { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - bytes payload = 2; + optional bytes payload = 2; } @@ -1846,7 +1846,7 @@ message MessageWithComponents { // // If this is absent, it is expected that there are no // references. - Components components = 1; + optional Components components = 1; // (Required) The root message that may contain pointers // that should be resolved by looking inside components. @@ -1960,7 +1960,7 @@ message ExecutableStagePayload { // value using the beam:coder:windowed_value:v1 coder parameterized by // a beam:coder:bytes:v1 element coder and the window coder that this // param_windowed_value coder uses. - bytes payload = 2; + optional bytes payload = 2; // (Required) The target(PCollection or Timer) this setting applies to. oneof target { diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto index 46fb3d5d3b1c..e84a8b0d929f 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto @@ -36,7 +36,7 @@ message ApiServiceDescriptor { // (Optional) The method for authentication. If unspecified, access to the // url is already being performed in a trusted context (e.g. localhost, // private network). - AuthenticationSpec authentication = 2; + optional AuthenticationSpec authentication = 2; } message AuthenticationSpec { @@ -48,7 +48,7 @@ message AuthenticationSpec { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - bytes payload = 2; + optional bytes payload = 2; } // TODO: Add authentication specifications as needed. diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto index 6e05aada21f9..411c28fe7ff9 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto @@ -47,7 +47,7 @@ message Field { // REQUIRED. Name of this field within the schema. string name = 1; // OPTIONAL. Human readable description of this field, such as the query that generated it. - string description = 2; + optional string description = 2; FieldType type = 3; int32 id = 4; @@ -58,7 +58,7 @@ message Field { // If no fields have encoding position populated the order of encoding is the same as the order in the Schema. // If this Field is part of a Schema where encoding_positions_set is True then encoding_position must be // defined, otherwise this field is ignored. - int32 encoding_position = 5; + optional int32 encoding_position = 5; repeated Option options = 6; } From 1d93010d0904a737b8bb45d0e311830f41000e15 Mon Sep 17 00:00:00 2001 From: Anand Inguva <34158215+AnandInguva@users.noreply.github.com> Date: Wed, 22 Feb 2023 17:38:25 -0500 Subject: [PATCH 25/64] Add license headers --- .../coders/proto2_coder_test_messages_pb2.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py b/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py index 88a6f15dd8c9..231625a12d22 100644 --- a/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py +++ b/sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py @@ -1,3 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: apache_beam/coders/proto2_coder_test_messages.proto From 3801c162fe4a0c155a8e1d9df2111466a2da66a2 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 18:09:30 -0500 Subject: [PATCH 26/64] Add more optional --- .../model/fn_execution/v1/beam_fn_api.proto | 2 +- .../model/pipeline/v1/beam_runner_api.proto | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto index 975d18cbdfb7..dca8ce5620f6 100644 --- a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto +++ b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto @@ -946,7 +946,7 @@ message StateGetRequest { // // If unspecified, signals to the runner that the response should start // from the beginning of the logical continuable stream. - bytes continuation_token = 1; + optional bytes continuation_token = 1; } // A response to get state representing a logical byte stream which can be diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto index 7ba39294a279..fc856ed7d6e2 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto @@ -191,11 +191,11 @@ message PTransform { // there is none, it may be omitted. repeated DisplayData display_data = 6; - // Environment where the current PTransform should be executed in. + // (Optional) Environment where the current PTransform should be executed in. // // Transforms that are required to be implemented by a runner must omit this. // All other transforms are required to specify this. - string environment_id = 7; + optional string environment_id = 7; // (Optional) A map from URNs designating a type of annotation, to the // annotation in binary format. For example, an annotation could indicate @@ -749,19 +749,19 @@ message WriteFilesPayload { // proto if with_attributes == true. Otherwise, the bytes is the raw payload. message PubSubReadPayload { - // Topic to read from. Exactly one of topic or subscription should be set. + // (Optional) Topic to read from. Exactly one of topic or subscription should be set. // Topic format is: /topics/project_id/subscription_name - string topic = 1; + optional string topic = 1; - // Subscription to read from. Exactly one of topic or subscription should be set. + // (Optional) Subscription to read from. Exactly one of topic or subscription should be set. // Subscription format is: /subscriptions/project_id/subscription_name - string subscription = 2; + optional string subscription = 2; - // Attribute that provides element timestamps. - string timestamp_attribute = 3; + // (Optional) Attribute that provides element timestamps. + optional string timestamp_attribute = 3; - // Attribute to be used for uniquely identifying messages. - string id_attribute = 4; + // (Optional) Attribute to be used for uniquely identifying messages. + optional string id_attribute = 4; // If true, reads Pub/Sub payload as well as attributes. If false, reads only the payload. bool with_attributes = 5; @@ -786,11 +786,11 @@ message PubSubWritePayload { // Topic format is: /topics/project_id/subscription_name string topic = 1; - // Attribute that provides element timestamps. - string timestamp_attribute = 2; + // (Optional) Attribute that provides element timestamps. + optional string timestamp_attribute = 2; - // Attribute that uniquely identify messages. - string id_attribute = 3; + // (Optional) Attribute that uniquely identify messages. + optional string id_attribute = 3; // If set, the topic is expected to be provided during runtime. string topic_runtime_overridden = 4; @@ -1540,7 +1540,7 @@ message Environment { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - bytes payload = 3; + optional bytes payload = 3; // (Optional) Static display data for the environment. If there is none, // it may be omitted. From 8d255638d1a2c4f50306b81aa0a0825a91df14a7 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 18:10:41 -0500 Subject: [PATCH 27/64] Changes types to make them compatible with mypy-protobuf generated stubs --- .../apache_beam/runners/portability/abstract_job_service.py | 2 +- .../runners/portability/fn_api_runner/execution.py | 2 +- sdks/python/apache_beam/runners/worker/log_handler.py | 2 +- sdks/python/apache_beam/transforms/core.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sdks/python/apache_beam/runners/portability/abstract_job_service.py b/sdks/python/apache_beam/runners/portability/abstract_job_service.py index a369af94e7dd..1aa841df4c31 100644 --- a/sdks/python/apache_beam/runners/portability/abstract_job_service.py +++ b/sdks/python/apache_beam/runners/portability/abstract_job_service.py @@ -194,7 +194,7 @@ class AbstractBeamJob(object): def __init__(self, job_id, # type: str - job_name, # type: Optional[str] + job_name, # type: str pipeline, # type: beam_runner_api_pb2.Pipeline options # type: struct_pb2.Struct ): diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py index 400cf27f3444..fe1f2cc42153 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py @@ -500,7 +500,7 @@ def __init__(self, execution_context, windowing_strategy_proto): self._counter = 0 # Lazily created in make_process_bundle_descriptor() self._process_bundle_descriptor = None - self._bundle_processor_id = None # type: Optional[str] + self._bundle_processor_id = '' # type: str self.windowed_input_coder_impl = None # type: Optional[CoderImpl] self.windowed_output_coder_impl = None # type: Optional[CoderImpl] diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py index 5731a4be05b0..16aef3b0707a 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler.py +++ b/sdks/python/apache_beam/runners/worker/log_handler.py @@ -112,7 +112,7 @@ def connect(self): return self._logging_stub.Logging(self._write_log_entries()) def map_log_level(self, level): - # type: (int) -> beam_fn_api_pb2.LogEntry.Severity.Enum + # type: (int) -> beam_fn_api_pb2.LogEntry.Severity.Enum.ValueType try: return LOG_LEVEL_TO_LOGENTRY_MAP[level] except KeyError: diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py index cdc96b52b378..47aaeff43a6f 100644 --- a/sdks/python/apache_beam/transforms/core.py +++ b/sdks/python/apache_beam/transforms/core.py @@ -3164,8 +3164,8 @@ class Windowing(object): def __init__(self, windowfn, # type: WindowFn triggerfn=None, # type: typing.Optional[TriggerFn] - accumulation_mode=None, # type: typing.Optional[beam_runner_api_pb2.AccumulationMode.Enum] - timestamp_combiner=None, # type: typing.Optional[beam_runner_api_pb2.OutputTime.Enum] + accumulation_mode=None, # type: typing.Optional[beam_runner_api_pb2.AccumulationMode.Enum.ValueType] + timestamp_combiner=None, # type: typing.Optional[beam_runner_api_pb2.OutputTime.Enum.ValueType] allowed_lateness=0, # type: typing.Union[int, float] environment_id=None, # type: typing.Optional[str] ): From de15ba29d231ece7d3e371f016ab5d08d24c7fea Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 21:22:59 -0500 Subject: [PATCH 28/64] Add license for grpcio-status --- sdks/python/container/license_scripts/dep_urls_py.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdks/python/container/license_scripts/dep_urls_py.yaml b/sdks/python/container/license_scripts/dep_urls_py.yaml index 9aa1111e85db..a0b1cd959735 100644 --- a/sdks/python/container/license_scripts/dep_urls_py.yaml +++ b/sdks/python/container/license_scripts/dep_urls_py.yaml @@ -81,6 +81,9 @@ pip_dependencies: notice: "https://raw.githubusercontent.com/grpc/grpc/master/NOTICE.txt" grpcio-gcp: license: "https://raw.githubusercontent.com/GoogleCloudPlatform/grpc-gcp-python/master/LICENSE" + grpcio-status: + license: "https://raw.githubusercontent.com/grpc/grpc/master/LICENSE" + notice: "https://raw.githubusercontent.com/grpc/grpc/master/NOTICE.txt" guppy: license: "https://raw.githubusercontent.com/joshwcomeau/guppy/master/LICENSE.md" guppy3: From 853fc864171662600ba8fc19942caad8308f445e Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 20:14:38 +0000 Subject: [PATCH 29/64] update mypy to 0.790 --- sdks/python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index f3e55b1d99a9..f77c15fb8803 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -132,7 +132,7 @@ commands = [testenv:py37-mypy] deps = -r build-requirements.txt - mypy==0.782 + mypy==0.790 dask==2022.01.0 distributed==2022.01.0 # make extras available in case any of these libs are typed From 1ebc81517c7361acf83762e1cb3a48ab25d7d8ea Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 20:15:19 +0000 Subject: [PATCH 30/64] add more optional fields to satisfy mypy type checker --- .../org/apache/beam/model/fn_execution/v1/beam_fn_api.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto index dca8ce5620f6..1db1787ab37d 100644 --- a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto +++ b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto @@ -955,7 +955,7 @@ message StateGetResponse { // (Optional) If specified, represents a token which can be used with the // state API to get the next chunk of this logical byte stream. The end of // the logical byte stream is signalled by this field being unset. - bytes continuation_token = 1; + optional bytes continuation_token = 1; // Represents a part of a logical byte stream. Elements within // the logical byte stream are encoded in the nested context and From 7bfece4377542c0324bdb4b2836a13ccc41b356f Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 20:15:46 +0000 Subject: [PATCH 31/64] Identify and solve the mypy type checks --- sdks/python/apache_beam/ml/inference/pytorch_inference.py | 2 +- sdks/python/apache_beam/runners/pipeline_context.py | 6 +++++- .../runners/portability/fn_api_runner/execution.py | 2 +- .../runners/portability/fn_api_runner/fn_runner.py | 2 +- .../runners/portability/fn_api_runner/translations.py | 2 +- .../runners/portability/fn_api_runner/worker_handlers.py | 2 +- .../apache_beam/runners/portability/local_job_service.py | 2 +- 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/pytorch_inference.py b/sdks/python/apache_beam/ml/inference/pytorch_inference.py index 71a4ccc63a27..818dd8325dcc 100644 --- a/sdks/python/apache_beam/ml/inference/pytorch_inference.py +++ b/sdks/python/apache_beam/ml/inference/pytorch_inference.py @@ -97,7 +97,7 @@ def _load_model( "Loading state_dict_path %s onto a %s device", state_dict_path, device) if not torch_script_model_path: file = FileSystems.open(state_dict_path, 'rb') - model = model_class(**model_params) # type: ignore[misc] + model = model_class(**model_params) # type: ignore[arg-type,misc] state_dict = torch.load(file, map_location=device) model.load_state_dict(state_dict) else: diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py index a4966e64559d..c9f4604984b8 100644 --- a/sdks/python/apache_beam/runners/pipeline_context.py +++ b/sdks/python/apache_beam/runners/pipeline_context.py @@ -310,7 +310,11 @@ def get_or_create_environment_with_resource_hints( """Creates an environment that has necessary hints and returns its id.""" template_env = self.environments.get_proto_from_id(template_env_id) cloned_env = beam_runner_api_pb2.Environment() - cloned_env.CopyFrom(template_env) + # Remove the suppress warning for type once mypy is updated to + # newer version. https://github.com/apache/beam/issues/25615 + # error: Argument 1 to "CopyFrom" of "Message" has incompatible type "Message"; + # expected "Environment" [arg-type] + cloned_env.CopyFrom(template_env) # type: ignore[arg-type] cloned_env.resource_hints.clear() cloned_env.resource_hints.update(resource_hints) diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py index fe1f2cc42153..0a59b5311137 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py @@ -677,7 +677,7 @@ def make_coder(urn, *components): windowing_strategy_id=global_windowing_strategy_id, coder_id=output_coder_id), }, - coders=coders, # type: ignore + coders=coders, windowing_strategies={ global_windowing_strategy_id: global_windowing_strategy_proto, }, diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py index 052029e1cf7b..8d957068d08b 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py @@ -1105,7 +1105,7 @@ class ExtendedProvisionInfo(object): def __init__(self, provision_info=None, # type: Optional[beam_provision_api_pb2.ProvisionInfo] artifact_staging_dir=None, # type: Optional[str] - job_name=None, # type: Optional[str] + job_name='', # type: str ): # type: (...) -> None self.provision_info = ( diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py index 0a289056a6a8..154c8efdd21f 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py @@ -256,7 +256,7 @@ def has_as_main_input(self, pcoll): transform.spec.payload, beam_runner_api_pb2.ParDoPayload) local_side_inputs = payload.side_inputs else: - local_side_inputs = {} + local_side_inputs = {} # type: ignore[assignment] for local_id, pipeline_id in transform.inputs.items(): if pcoll == pipeline_id and local_id not in local_side_inputs: return True diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py index abb356d5ff4e..b11c8349909c 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py @@ -428,7 +428,7 @@ def GetProvisionInfo(self, request, context=None): worker_id = dict(context.invocation_metadata())['worker_id'] worker = self._worker_manager.get_worker(worker_id) info = copy.copy(worker.provision_info.provision_info) - info.logging_endpoint.CopyFrom(worker.logging_api_service_descriptor()) # type: ignore + info.logging_endpoint.CopyFrom(worker.logging_api_service_descriptor()) info.artifact_endpoint.CopyFrom(worker.artifact_api_service_descriptor()) info.control_endpoint.CopyFrom(worker.control_api_service_descriptor()) else: diff --git a/sdks/python/apache_beam/runners/portability/local_job_service.py b/sdks/python/apache_beam/runners/portability/local_job_service.py index 7f1908a7e7e1..91ddb3fced15 100644 --- a/sdks/python/apache_beam/runners/portability/local_job_service.py +++ b/sdks/python/apache_beam/runners/portability/local_job_service.py @@ -239,7 +239,7 @@ class BeamJob(abstract_job_service.AbstractBeamJob): """ def __init__(self, - job_id, # type: str + job_id, # type: str pipeline, options, provision_info, # type: fn_runner.ExtendedProvisionInfo From 7552e3df57998587cd3a5344534beea7f556e7df Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 20:23:38 +0000 Subject: [PATCH 32/64] fix lint --- sdks/python/apache_beam/runners/pipeline_context.py | 6 ++++-- sdks/python/gen_protos.py | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py index c9f4604984b8..aa515d41ca3b 100644 --- a/sdks/python/apache_beam/runners/pipeline_context.py +++ b/sdks/python/apache_beam/runners/pipeline_context.py @@ -312,8 +312,10 @@ def get_or_create_environment_with_resource_hints( cloned_env = beam_runner_api_pb2.Environment() # Remove the suppress warning for type once mypy is updated to # newer version. https://github.com/apache/beam/issues/25615 - # error: Argument 1 to "CopyFrom" of "Message" has incompatible type "Message"; - # expected "Environment" [arg-type] + # error: Argument 1 to "CopyFrom" of "Message" has incompatible type + # "Message"; expected "Environment" [arg-type] + # Here, Environment is a subclass of Message but mypy still + # throws an error. cloned_env.CopyFrom(template_env) # type: ignore[arg-type] cloned_env.resource_hints.clear() cloned_env.resource_hints.update(resource_hints) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index 734a39e780e4..e69ec32f731f 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -33,7 +33,6 @@ from collections import defaultdict from importlib import import_module -from google.protobuf.internal import containers import pkg_resources LOG = logging.getLogger() From 01d789846394f29a2476c1debf0f4042cec2280a Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 20:15:19 +0000 Subject: [PATCH 33/64] Revert "add more optional fields to satisfy mypy type checker" This reverts commit 1ebc81517c7361acf83762e1cb3a48ab25d7d8ea. --- .../org/apache/beam/model/fn_execution/v1/beam_fn_api.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto index 1db1787ab37d..dca8ce5620f6 100644 --- a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto +++ b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto @@ -955,7 +955,7 @@ message StateGetResponse { // (Optional) If specified, represents a token which can be used with the // state API to get the next chunk of this logical byte stream. The end of // the logical byte stream is signalled by this field being unset. - optional bytes continuation_token = 1; + bytes continuation_token = 1; // Represents a part of a logical byte stream. Elements within // the logical byte stream are encoded in the nested context and From 15f47d86460e7cdc8a9a8b96bd863ee567cff31d Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 18:09:30 -0500 Subject: [PATCH 34/64] Revert "Add more optional" This reverts commit 3801c162fe4a0c155a8e1d9df2111466a2da66a2. --- .../model/fn_execution/v1/beam_fn_api.proto | 2 +- .../model/pipeline/v1/beam_runner_api.proto | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto index dca8ce5620f6..975d18cbdfb7 100644 --- a/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto +++ b/model/fn-execution/src/main/proto/org/apache/beam/model/fn_execution/v1/beam_fn_api.proto @@ -946,7 +946,7 @@ message StateGetRequest { // // If unspecified, signals to the runner that the response should start // from the beginning of the logical continuable stream. - optional bytes continuation_token = 1; + bytes continuation_token = 1; } // A response to get state representing a logical byte stream which can be diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto index fc856ed7d6e2..7ba39294a279 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto @@ -191,11 +191,11 @@ message PTransform { // there is none, it may be omitted. repeated DisplayData display_data = 6; - // (Optional) Environment where the current PTransform should be executed in. + // Environment where the current PTransform should be executed in. // // Transforms that are required to be implemented by a runner must omit this. // All other transforms are required to specify this. - optional string environment_id = 7; + string environment_id = 7; // (Optional) A map from URNs designating a type of annotation, to the // annotation in binary format. For example, an annotation could indicate @@ -749,19 +749,19 @@ message WriteFilesPayload { // proto if with_attributes == true. Otherwise, the bytes is the raw payload. message PubSubReadPayload { - // (Optional) Topic to read from. Exactly one of topic or subscription should be set. + // Topic to read from. Exactly one of topic or subscription should be set. // Topic format is: /topics/project_id/subscription_name - optional string topic = 1; + string topic = 1; - // (Optional) Subscription to read from. Exactly one of topic or subscription should be set. + // Subscription to read from. Exactly one of topic or subscription should be set. // Subscription format is: /subscriptions/project_id/subscription_name - optional string subscription = 2; + string subscription = 2; - // (Optional) Attribute that provides element timestamps. - optional string timestamp_attribute = 3; + // Attribute that provides element timestamps. + string timestamp_attribute = 3; - // (Optional) Attribute to be used for uniquely identifying messages. - optional string id_attribute = 4; + // Attribute to be used for uniquely identifying messages. + string id_attribute = 4; // If true, reads Pub/Sub payload as well as attributes. If false, reads only the payload. bool with_attributes = 5; @@ -786,11 +786,11 @@ message PubSubWritePayload { // Topic format is: /topics/project_id/subscription_name string topic = 1; - // (Optional) Attribute that provides element timestamps. - optional string timestamp_attribute = 2; + // Attribute that provides element timestamps. + string timestamp_attribute = 2; - // (Optional) Attribute that uniquely identify messages. - optional string id_attribute = 3; + // Attribute that uniquely identify messages. + string id_attribute = 3; // If set, the topic is expected to be provided during runtime. string topic_runtime_overridden = 4; @@ -1540,7 +1540,7 @@ message Environment { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - optional bytes payload = 3; + bytes payload = 3; // (Optional) Static display data for the environment. If there is none, // it may be omitted. From cb230204762f55d0720ceb78fc8985147e0ece42 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 22 Feb 2023 17:31:14 -0500 Subject: [PATCH 35/64] Revert "Add optional field" This reverts commit 35631f529b22a7585728d4071eb8daec7b34ea74. --- .../model/pipeline/v1/beam_runner_api.proto | 32 +++++++++---------- .../beam/model/pipeline/v1/endpoints.proto | 4 +-- .../beam/model/pipeline/v1/schema.proto | 4 +-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto index 7ba39294a279..644c6d4bda52 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/beam_runner_api.proto @@ -151,7 +151,7 @@ message PTransform { // is a ParDoPayload, and so on. For some special composite transforms, // the payload is also officially defined. See StandardPTransforms for // details. - optional FunctionSpec spec = 1; + FunctionSpec spec = 1; // (Optional) A list of the ids of transforms that it contains. // @@ -529,12 +529,12 @@ message ParDoPayload { // (Optional) Only set when this ParDo contains a splittable DoFn. // If this is set, the corresponding standard requirement should also // be placed in the pipeline requirements. - optional string restriction_coder_id = 7; + string restriction_coder_id = 7; // (Optional) Only set when this ParDo can request bundle finalization. // If this is set, the corresponding standard requirement should also // be placed in the pipeline requirements. - optional bool requests_finalization = 8; + bool requests_finalization = 8; // Whether this stage requires time sorted input. // If this is set, the corresponding standard requirement should also @@ -662,7 +662,7 @@ message TestStreamPayload { // (Optional) If specified, points to a TestStreamService to be // used to retrieve events. - optional ApiServiceDescriptor endpoint = 3; + ApiServiceDescriptor endpoint = 3; message Event { oneof event { @@ -679,7 +679,7 @@ message TestStreamPayload { // (Optional) The output watermark tag for a PCollection. If unspecified // or with an empty string, this will default to the Main PCollection // Output - optional string tag = 2; + string tag = 2; } // Advances the processing time clock by the specified amount. @@ -696,7 +696,7 @@ message TestStreamPayload { // (Optional) The output PCollection tag to add these elements to. If // unspecified or with an empty string, this will default to the Main // PCollection Output. - optional string tag = 3; + string tag = 3; } } @@ -806,7 +806,7 @@ message GroupIntoBatchesPayload { int64 batch_size_bytes = 3; // (Optional) Max duration a batch is allowed to be cached in states. - optional int64 max_buffering_duration_millis = 2; + int64 max_buffering_duration_millis = 2; } // A coder, the binary format for serialization and deserialization of data in @@ -1156,7 +1156,7 @@ message WindowingStrategy { // (Optional) Environment where the current window_fn should be applied in. // Runner that executes the pipeline may choose to override this if needed. // If not specified, environment will be decided by the runner. - optional string environment_id = 11; + string environment_id = 11; } // Whether or not a PCollection's WindowFn is non-merging, merging, or @@ -1297,10 +1297,10 @@ message Trigger { message AfterEndOfWindow { // (Optional) A trigger governing output prior to the end of the window. - optional Trigger early_firings = 1; + Trigger early_firings = 1; // (Optional) A trigger governing output after the end of the window. - optional Trigger late_firings = 2; + Trigger late_firings = 2; } // After input arrives, ready when the specified delay has passed. @@ -1481,7 +1481,7 @@ message ArtifactUrlPayload { string url = 1; // (Optional) The hex-encoded sha256 checksum of the artifact if available. - optional string sha256 = 2; + string sha256 = 2; } message EmbeddedFilePayload { @@ -1503,7 +1503,7 @@ message MavenPayload { string artifact = 1; // (Optional) Repository URL. If not specified, Maven central is used by default. - optional string repository_url = 2; + string repository_url = 2; } message DeferredArtifactPayload { @@ -1765,7 +1765,7 @@ message FunctionSpec { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - optional bytes payload = 3; + bytes payload = 3; } // A set of well known URNs describing display data. @@ -1829,7 +1829,7 @@ message DisplayData { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - optional bytes payload = 2; + bytes payload = 2; } @@ -1846,7 +1846,7 @@ message MessageWithComponents { // // If this is absent, it is expected that there are no // references. - optional Components components = 1; + Components components = 1; // (Required) The root message that may contain pointers // that should be resolved by looking inside components. @@ -1960,7 +1960,7 @@ message ExecutableStagePayload { // value using the beam:coder:windowed_value:v1 coder parameterized by // a beam:coder:bytes:v1 element coder and the window coder that this // param_windowed_value coder uses. - optional bytes payload = 2; + bytes payload = 2; // (Required) The target(PCollection or Timer) this setting applies to. oneof target { diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto index e84a8b0d929f..46fb3d5d3b1c 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/endpoints.proto @@ -36,7 +36,7 @@ message ApiServiceDescriptor { // (Optional) The method for authentication. If unspecified, access to the // url is already being performed in a trusted context (e.g. localhost, // private network). - optional AuthenticationSpec authentication = 2; + AuthenticationSpec authentication = 2; } message AuthenticationSpec { @@ -48,7 +48,7 @@ message AuthenticationSpec { // (Optional) The data specifying any parameters to the URN. If // the URN does not require any arguments, this may be omitted. - optional bytes payload = 2; + bytes payload = 2; } // TODO: Add authentication specifications as needed. diff --git a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto index 411c28fe7ff9..6e05aada21f9 100644 --- a/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto +++ b/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/v1/schema.proto @@ -47,7 +47,7 @@ message Field { // REQUIRED. Name of this field within the schema. string name = 1; // OPTIONAL. Human readable description of this field, such as the query that generated it. - optional string description = 2; + string description = 2; FieldType type = 3; int32 id = 4; @@ -58,7 +58,7 @@ message Field { // If no fields have encoding position populated the order of encoding is the same as the order in the Schema. // If this Field is part of a Schema where encoding_positions_set is True then encoding_position must be // defined, otherwise this field is ignored. - optional int32 encoding_position = 5; + int32 encoding_position = 5; repeated Option options = 6; } From 7b4d9f5bf7ea4cde8ce1a538436922037971f21d Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 24 Feb 2023 03:39:58 +0000 Subject: [PATCH 36/64] add relax_strict_optional_primitives to mypy_out --- sdks/python/apache_beam/transforms/external.py | 2 +- sdks/python/gen_protos.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/transforms/external.py b/sdks/python/apache_beam/transforms/external.py index 1c4a6dd05197..3ab0b87b09ed 100644 --- a/sdks/python/apache_beam/transforms/external.py +++ b/sdks/python/apache_beam/transforms/external.py @@ -595,7 +595,7 @@ def expand(self, pvalueish): components = context.to_runner_api() request = beam_expansion_api_pb2.ExpansionRequest( components=components, - namespace=self._external_namespace, # type: ignore # mypy thinks self._namespace is threading.local + namespace=self._external_namespace, transform=transform_proto, output_coder_requests=output_coders) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index e69ec32f731f..b05df35cfe63 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -512,7 +512,12 @@ def generate_proto_files(force=False): ['--proto_path=%s' % d for d in proto_dirs] + ['--python_out=%s' % PYTHON_OUTPUT_PATH] + ['--plugin=protoc-gen-mypy=%s' % protoc_gen_mypy] + - ['--mypy_out=%s' % PYTHON_OUTPUT_PATH] + + # new version of mypy-protobuf converts None to zero default value + # and remove Optional from the param type annotation. This causes + # some mypy errors. So to mitigate and fall back to old behaviort, + # use `relax_strict_optional_primitives` flag. more at + # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint: disable: line-tool-long + ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH] + # TODO(robertwb): Remove the prefix once it's the default. ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + proto_files) From bf78ba68988bfbf1a82513d210eb96e5eadb3d68 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 23:41:06 -0500 Subject: [PATCH 37/64] Fix up formatting --- sdks/python/apache_beam/runners/pipeline_context.py | 2 +- .../runners/portability/fn_api_runner/translations.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py index aa515d41ca3b..2c6181f0a0eb 100644 --- a/sdks/python/apache_beam/runners/pipeline_context.py +++ b/sdks/python/apache_beam/runners/pipeline_context.py @@ -316,7 +316,7 @@ def get_or_create_environment_with_resource_hints( # "Message"; expected "Environment" [arg-type] # Here, Environment is a subclass of Message but mypy still # throws an error. - cloned_env.CopyFrom(template_env) # type: ignore[arg-type] + cloned_env.CopyFrom(template_env) # type: ignore[arg-type] cloned_env.resource_hints.clear() cloned_env.resource_hints.update(resource_hints) diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py index 154c8efdd21f..df3578e64974 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py @@ -256,7 +256,7 @@ def has_as_main_input(self, pcoll): transform.spec.payload, beam_runner_api_pb2.ParDoPayload) local_side_inputs = payload.side_inputs else: - local_side_inputs = {} # type: ignore[assignment] + local_side_inputs = {} # type: ignore[assignment] for local_id, pipeline_id in transform.inputs.items(): if pcoll == pipeline_id and local_id not in local_side_inputs: return True From b7a1807dec2066bcac436f01e2b1189c4a269952 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 23:43:33 -0500 Subject: [PATCH 38/64] Fix up lint --- sdks/python/gen_protos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index b05df35cfe63..ea249cc8e446 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -514,9 +514,9 @@ def generate_proto_files(force=False): ['--plugin=protoc-gen-mypy=%s' % protoc_gen_mypy] + # new version of mypy-protobuf converts None to zero default value # and remove Optional from the param type annotation. This causes - # some mypy errors. So to mitigate and fall back to old behaviort, + # some mypy errors. So to mitigate and fall back to old behavior, # use `relax_strict_optional_primitives` flag. more at - # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint: disable: line-tool-long + # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-tool-long ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH] + # TODO(robertwb): Remove the prefix once it's the default. ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + From cb8d016eea14d5e0c485fc11f898fc90571a5ddc Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 23:41:06 -0500 Subject: [PATCH 39/64] Fix up formatting --- sdks/python/apache_beam/runners/pipeline_context.py | 2 +- .../runners/portability/fn_api_runner/translations.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py index aa515d41ca3b..2c6181f0a0eb 100644 --- a/sdks/python/apache_beam/runners/pipeline_context.py +++ b/sdks/python/apache_beam/runners/pipeline_context.py @@ -316,7 +316,7 @@ def get_or_create_environment_with_resource_hints( # "Message"; expected "Environment" [arg-type] # Here, Environment is a subclass of Message but mypy still # throws an error. - cloned_env.CopyFrom(template_env) # type: ignore[arg-type] + cloned_env.CopyFrom(template_env) # type: ignore[arg-type] cloned_env.resource_hints.clear() cloned_env.resource_hints.update(resource_hints) diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py index 154c8efdd21f..df3578e64974 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py @@ -256,7 +256,7 @@ def has_as_main_input(self, pcoll): transform.spec.payload, beam_runner_api_pb2.ParDoPayload) local_side_inputs = payload.side_inputs else: - local_side_inputs = {} # type: ignore[assignment] + local_side_inputs = {} # type: ignore[assignment] for local_id, pipeline_id in transform.inputs.items(): if pcoll == pipeline_id and local_id not in local_side_inputs: return True From 650827f539b58dc1838f1b6afd9210dd85ee5a43 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 23 Feb 2023 23:43:33 -0500 Subject: [PATCH 40/64] Fix up lint --- sdks/python/gen_protos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index b05df35cfe63..ea249cc8e446 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -514,9 +514,9 @@ def generate_proto_files(force=False): ['--plugin=protoc-gen-mypy=%s' % protoc_gen_mypy] + # new version of mypy-protobuf converts None to zero default value # and remove Optional from the param type annotation. This causes - # some mypy errors. So to mitigate and fall back to old behaviort, + # some mypy errors. So to mitigate and fall back to old behavior, # use `relax_strict_optional_primitives` flag. more at - # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint: disable: line-tool-long + # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-tool-long ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH] + # TODO(robertwb): Remove the prefix once it's the default. ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + From f522b38b62cdacbb2ff73aaf5254b6f9c3027adf Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 24 Feb 2023 10:40:09 -0500 Subject: [PATCH 41/64] fix lint --- sdks/python/gen_protos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index ea249cc8e446..c90c76d178ed 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -516,7 +516,7 @@ def generate_proto_files(force=False): # and remove Optional from the param type annotation. This causes # some mypy errors. So to mitigate and fall back to old behavior, # use `relax_strict_optional_primitives` flag. more at - # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-tool-long + # https://github.com/nipunn1313/mypy-protobuf/tree/main#relax_strict_optional_primitives # pylint:disable=line-too-long ['--mypy_out=relax_strict_optional_primitives:%s' % PYTHON_OUTPUT_PATH] + # TODO(robertwb): Remove the prefix once it's the default. ['--grpc_python_out=grpc_2_0:%s' % PYTHON_OUTPUT_PATH] + From 7c4d26886c654e462083a1f461f419110a3b39a6 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 24 Feb 2023 10:50:11 -0500 Subject: [PATCH 42/64] Fix up docs --- sdks/python/apache_beam/ml/gcp/naturallanguageml.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py index ecc4bfdafd85..fc93f0350741 100644 --- a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py +++ b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py @@ -92,13 +92,11 @@ def AnnotateText( https://cloud.google.com/natural-language/docs. Args: - pcoll (:class:`~apache_beam.pvalue.PCollection`): An input PCollection of - :class:`Document` objects. - features (`Union[Mapping[str, bool], - language_v1.AnnotateTextRequest.Features]`): - A dictionary of natural language operations to be performed on given - text in the following format:: + pcoll (:class:`~apache_beam.pvalue.PCollection`): An input PCollection + of :class:`Document` objects. + features: A dictionary of natural language operations to be performed + on given text in the following format:: {'extact_syntax'=True, 'extract_entities'=True} timeout (`Optional[float]`): The amount of time, in seconds, to wait From 297b7f4e5ac0823e4b5408e9dd6d4a0aaf913bc4 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 24 Feb 2023 11:41:41 -0500 Subject: [PATCH 43/64] Add gh issue to update code and doc strings --- .../apache_beam/io/gcp/datastore/v1new/datastoreio_test.py | 2 ++ sdks/python/apache_beam/ml/gcp/__init__.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py index 40a0cc98026f..3aaa658f79e6 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py @@ -49,6 +49,8 @@ client = None +# TODO(https://github.com/apache/beam/issues/25625) +# remove this FakeMutation and replace it with proto.Message class. class FakeMutation(object): def __init__(self, entity=None, key=None): """Fake mutation request object. diff --git a/sdks/python/apache_beam/ml/gcp/__init__.py b/sdks/python/apache_beam/ml/gcp/__init__.py index cce3acad34a4..523e689e347e 100644 --- a/sdks/python/apache_beam/ml/gcp/__init__.py +++ b/sdks/python/apache_beam/ml/gcp/__init__.py @@ -14,3 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +# TODO(https://github.com/apache/beam/issues/25625) +# update relevant code -> example: use class something(beam.PTransform) +# instead of decorator @beam.ptransform_fn on a function. + +# update type annotations and doc strings. From 83c4477e3178540955c70e237bb6b9f25ca750e3 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 24 Feb 2023 11:42:14 -0500 Subject: [PATCH 44/64] Update lower bound of gcp dependencies --- sdks/python/setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 5b8504603734..e9a51277d1cf 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -296,14 +296,14 @@ def get_portability_package_data(): # https://github.com/googleapis/google-cloud-python/issues/10566 'google-auth>=1.18.0,<3', 'google-auth-httplib2>=0.1.0,<0.2.0', - 'google-cloud-datastore>=1.8.0,<3', + 'google-cloud-datastore>=2.0.0,<3', 'google-cloud-pubsub>=2.1.0,<3', 'google-cloud-pubsublite>=1.2.0,<2', # GCP packages required by tests - 'google-cloud-bigquery>=1.6.0,<4', + 'google-cloud-bigquery>=2.0.0,<4', 'google-cloud-bigquery-storage>=2.6.3,<3', - 'google-cloud-core>=0.28.1,<3', - 'google-cloud-bigtable>=0.31.1,<3', + 'google-cloud-core>=2.0.0,<3', + 'google-cloud-bigtable>=2.0.0,<3', 'google-cloud-spanner>=3.0.0,<4', # GCP Packages required by ML functionality 'google-cloud-dlp>=3.0.0,<4', From adf34fbef193334dc7b79389def63622f35deaad Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Tue, 7 Mar 2023 14:00:24 -0500 Subject: [PATCH 45/64] Add tensorflow rc --- sdks/python/container/base_image_requirements_manual.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index 3722841507f2..5ebe635b13ce 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -39,4 +39,4 @@ nose==1.3.7 # For Dataflow internal testing. TODO: remove this. python-snappy # Optimizes execution of some Beam codepaths. scipy scikit-learn -protobuf +tensorflow==2.12.0rc0 From adb7d30429dd9e97489cbc6870ef306626ceaa90 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 8 Mar 2023 13:01:32 -0500 Subject: [PATCH 46/64] Add tf rc version and comment onnx tests --- .../base_image_requirements_manual.txt | 2 +- sdks/python/test-suites/tox/py38/build.gradle | 14 ++------ sdks/python/tox.ini | 36 +++++++++---------- 3 files changed, 21 insertions(+), 31 deletions(-) diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index 5ebe635b13ce..b7947e58b53a 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -39,4 +39,4 @@ nose==1.3.7 # For Dataflow internal testing. TODO: remove this. python-snappy # Optimizes execution of some Beam codepaths. scipy scikit-learn -tensorflow==2.12.0rc0 +tensorflow==2.12.0rc1 diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index 7d582bd89c1a..97954929a88e 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -107,17 +107,9 @@ toxTask "testPy38onnx-113", "py38-onnx-113", "${posargs}" test.dependsOn "testPy38onnx-113" preCommitPyCoverage.dependsOn "testPy38onnx-113" // Create a test task for each minor version of tensorflow -toxTask "testPy38tensorflow-29", "py38-tensorflow-29", "${posargs}" -test.dependsOn "testPy38tensorflow-29" -preCommitPyCoverage.dependsOn "testPy38tensorflow-29" - -toxTask "testPy38tensorflow-210", "py38-tensorflow-210", "${posargs}" -test.dependsOn "testPy38tensorflow-210" -preCommitPyCoverage.dependsOn "testPy38tensorflow-210" - -toxTask "testPy38tensorflow-211", "py38-tensorflow-211", "${posargs}" -test.dependsOn "testPy38tensorflow-211" -preCommitPyCoverage.dependsOn "testPy38tensorflow-211" +toxTask "testPy38tensorflow-212", "py38-tensorflow-212", "${posargs}" +test.dependsOn "testPy38tensorflow-212" +preCommitPyCoverage.dependsOn "testPy38tensorflow-212" toxTask "whitespacelint", "whitespacelint", "${posargs}" diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index f77c15fb8803..9dfaa38ea967 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -320,31 +320,29 @@ commands = # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' -[testenv:py{37,38,39,310}-onnx-113] +# [testenv:py{37,38,39,310}-onnx-113] # TODO(https://github.com/apache/beam/issues/25443) # apparently tox has problem when substitution key has single value. Change back to -onnx-{113,...} # when multiple onnx versions are tested. -deps = - onnxruntime==1.13.1 - pandas==1.5.2 - torch==1.13.1 - tensorflow==2.11.0 - tf2onnx==1.13.0 - skl2onnx==1.13 - transformers==4.25.1 -extras = test,gcp -commands = - # Log onnx version for debugging - /bin/sh -c "pip freeze | grep -E onnx" - # Run all ONNX unit tests - pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} +# deps = +# onnxruntime==1.13.1 +# pandas==1.5.2 +# torch==1.13.1 +# tensorflow==2.11.0 +# tf2onnx==1.13.0 +# skl2onnx==1.13 +# transformers==4.25.1 +# extras = test,gcp +# commands = +# # Log onnx version for debugging +# /bin/sh -c "pip freeze | grep -E onnx" +# # Run all ONNX unit tests +# pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} -[testenv:py{37,38,39,310}-tensorflow-{29,210,211}] +[testenv:py{37,38,39,310}-tensorflow-{212}] deps = -r build-requirements.txt - 29: tensorflow>=2.9.0,<2.10.0 - 210: tensorflow>=2.10.0,<2.11.0 - 211: tensorflow>=2.11.0,<2.12.0 + 212: tensorflow>=2.12rc1,<2.13 extras = test,gcp commands = # Log tensorflow version for debugging From 0070723549bc53e79055dfb204726977168dc898 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 8 Mar 2023 13:39:57 -0500 Subject: [PATCH 47/64] Update container dependencies --- .../base_image_requirements_manual.txt | 2 +- .../py310/base_image_requirements.txt | 69 ++++++++++++------ .../py37/base_image_requirements.txt | 46 ++++++------ .../py38/base_image_requirements.txt | 71 +++++++++++++------ .../py39/base_image_requirements.txt | 71 +++++++++++++------ 5 files changed, 166 insertions(+), 93 deletions(-) diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index b7947e58b53a..71cd6c2dcb97 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -39,4 +39,4 @@ nose==1.3.7 # For Dataflow internal testing. TODO: remove this. python-snappy # Optimizes execution of some Beam codepaths. scipy scikit-learn -tensorflow==2.12.0rc1 +tensorflow>=2.12.0rc1;python_version>="3.8" diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 430a26a725c6..456c08fcf7f7 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -21,17 +21,19 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. +absl-py==1.4.0 +astunparse==1.6.3 attrs==22.2.0 beautifulsoup4==4.11.2 bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 click==8.1.3 cloudpickle==2.2.1 crcmod==1.7 -cryptography==39.0.1 +cryptography==39.0.2 Cython==0.29.33 deprecation==2.1.0 dill==0.3.1.1 @@ -39,49 +41,61 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.1 +fastavro==1.7.2 fasteners==0.18 +flatbuffers==23.3.3 freezegun==1.2.2 future==0.18.3 +gast==0.4.0 google-api-core==2.11.0 google-apitools==0.5.31 -google-auth==2.16.1 +google-auth==2.16.2 google-auth-httplib2==0.1.0 -google-cloud-bigquery==3.5.0 -google-cloud-bigquery-storage==2.18.1 -google-cloud-bigtable==2.15.0 +google-auth-oauthlib==0.4.6 +google-cloud-bigquery==3.6.0 +google-cloud-bigquery-storage==2.19.0 +google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.13.2 -google-cloud-dlp==3.11.1 +google-cloud-datastore==2.14.0 +google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 -google-cloud-pubsub==2.14.1 +google-cloud-pubsub==2.15.0 google-cloud-pubsublite==1.7.0 -google-cloud-recommendations-ai==0.10.1 -google-cloud-spanner==3.27.1 -google-cloud-videointelligence==2.10.1 -google-cloud-vision==3.3.1 +google-cloud-recommendations-ai==0.10.2 +google-cloud-spanner==3.28.0 +google-cloud-videointelligence==2.11.0 +google-cloud-vision==3.4.0 google-crc32c==1.5.0 +google-pasta==0.2.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.51.1 -grpcio-status==1.51.1 +grpcio==1.51.3 +grpcio-status==1.51.3 guppy3==3.1.2 +h5py==3.8.0 hdfs==2.7.0 httplib2==0.21.0 hypothesis==6.68.2 idna==3.4 iniconfig==2.0.0 +jax==0.4.5 joblib==1.2.0 +keras==2.12.0rc1 +libclang==15.0.6.1 +Markdown==3.4.1 +MarkupSafe==2.1.2 mmh3==3.0.0 mock==2.0.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.2 +numpy==1.23.5 oauth2client==4.1.3 +oauthlib==3.2.2 objsize==0.6.1 -orjson==3.8.6 +opt-einsum==3.3.0 +orjson==3.8.7 overrides==6.5.0 packaging==23.0 pandas==1.5.3 @@ -89,7 +103,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==4.22.0 +protobuf==4.22.1 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -101,7 +115,7 @@ PyHamcrest==1.10.1 pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 -pytest==7.2.1 +pytest==7.2.2 pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 @@ -112,21 +126,30 @@ PyYAML==6.0 regex==2022.10.31 requests==2.28.2 requests-mock==1.10.0 +requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.2.1 -scipy==1.10.0 +scipy==1.10.1 six==1.16.0 sortedcontainers==2.4.0 soupsieve==2.4 SQLAlchemy==1.4.46 sqlparse==0.4.3 tenacity==5.1.5 +tensorboard==2.12.0 +tensorboard-data-server==0.7.0 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.12.0rc1 +tensorflow-estimator==2.12.0rc0 +tensorflow-io-gcs-filesystem==0.31.0 +termcolor==2.2.0 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 -tqdm==4.64.1 +tqdm==4.65.0 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 +Werkzeug==2.2.3 wrapt==1.14.1 -zstandard==0.19.0 +zstandard==0.20.0 diff --git a/sdks/python/container/py37/base_image_requirements.txt b/sdks/python/container/py37/base_image_requirements.txt index c89a571ed28f..acc7ea85c588 100644 --- a/sdks/python/container/py37/base_image_requirements.txt +++ b/sdks/python/container/py37/base_image_requirements.txt @@ -27,11 +27,11 @@ bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 click==8.1.3 cloudpickle==2.2.1 crcmod==1.7 -cryptography==39.0.1 +cryptography==39.0.2 Cython==0.29.33 deprecation==2.1.0 dill==0.3.1.1 @@ -39,34 +39,34 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.1 +fastavro==1.7.2 fasteners==0.18 freezegun==1.2.2 future==0.18.3 google-api-core==2.11.0 google-apitools==0.5.31 -google-auth==2.16.1 +google-auth==2.16.2 google-auth-httplib2==0.1.0 -google-cloud-bigquery==3.5.0 -google-cloud-bigquery-storage==2.18.1 -google-cloud-bigtable==2.15.0 +google-cloud-bigquery==3.6.0 +google-cloud-bigquery-storage==2.19.0 +google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.13.2 -google-cloud-dlp==3.11.1 +google-cloud-datastore==2.14.0 +google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 -google-cloud-pubsub==2.14.1 +google-cloud-pubsub==2.15.0 google-cloud-pubsublite==1.7.0 -google-cloud-recommendations-ai==0.10.1 -google-cloud-spanner==3.27.1 -google-cloud-videointelligence==2.10.1 -google-cloud-vision==3.3.1 +google-cloud-recommendations-ai==0.10.2 +google-cloud-spanner==3.28.0 +google-cloud-videointelligence==2.11.0 +google-cloud-vision==3.4.0 google-crc32c==1.5.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.51.1 -grpcio-status==1.51.1 +grpcio==1.51.3 +grpcio-status==1.51.3 guppy3==3.1.2 hdfs==2.7.0 httplib2==0.21.0 @@ -82,7 +82,7 @@ nose==1.3.7 numpy==1.21.6 oauth2client==4.1.3 objsize==0.6.1 -orjson==3.8.6 +orjson==3.8.7 overrides==6.5.0 packaging==23.0 pandas==1.3.5 @@ -90,7 +90,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==4.22.0 +protobuf==4.22.1 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -102,7 +102,7 @@ PyHamcrest==1.10.1 pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 -pytest==7.2.1 +pytest==7.2.2 pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 @@ -125,10 +125,10 @@ tenacity==5.1.5 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 -tqdm==4.64.1 +tqdm==4.65.0 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 -wrapt==1.14.1 -zipp==3.14.0 -zstandard==0.19.0 +wrapt==1.15.0 +zipp==3.15.0 +zstandard==0.20.0 diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt index 31a95bc4e974..fc892f79de54 100644 --- a/sdks/python/container/py38/base_image_requirements.txt +++ b/sdks/python/container/py38/base_image_requirements.txt @@ -21,17 +21,19 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. +absl-py==1.4.0 +astunparse==1.6.3 attrs==22.2.0 beautifulsoup4==4.11.2 bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 click==8.1.3 cloudpickle==2.2.1 crcmod==1.7 -cryptography==39.0.1 +cryptography==39.0.2 Cython==0.29.33 deprecation==2.1.0 dill==0.3.1.1 @@ -39,49 +41,62 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.1 +fastavro==1.7.2 fasteners==0.18 +flatbuffers==23.3.3 freezegun==1.2.2 future==0.18.3 +gast==0.4.0 google-api-core==2.11.0 google-apitools==0.5.31 -google-auth==2.16.1 +google-auth==2.16.2 google-auth-httplib2==0.1.0 -google-cloud-bigquery==3.5.0 -google-cloud-bigquery-storage==2.18.1 -google-cloud-bigtable==2.15.0 +google-auth-oauthlib==0.4.6 +google-cloud-bigquery==3.6.0 +google-cloud-bigquery-storage==2.19.0 +google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.13.2 -google-cloud-dlp==3.11.1 +google-cloud-datastore==2.14.0 +google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 -google-cloud-pubsub==2.14.1 +google-cloud-pubsub==2.15.0 google-cloud-pubsublite==1.7.0 -google-cloud-recommendations-ai==0.10.1 -google-cloud-spanner==3.27.1 -google-cloud-videointelligence==2.10.1 -google-cloud-vision==3.3.1 +google-cloud-recommendations-ai==0.10.2 +google-cloud-spanner==3.28.0 +google-cloud-videointelligence==2.11.0 +google-cloud-vision==3.4.0 google-crc32c==1.5.0 +google-pasta==0.2.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.51.1 -grpcio-status==1.51.1 +grpcio==1.51.3 +grpcio-status==1.51.3 guppy3==3.1.2 +h5py==3.8.0 hdfs==2.7.0 httplib2==0.21.0 hypothesis==6.68.2 idna==3.4 +importlib-metadata==6.0.0 iniconfig==2.0.0 +jax==0.4.5 joblib==1.2.0 +keras==2.12.0rc1 +libclang==15.0.6.1 +Markdown==3.4.1 +MarkupSafe==2.1.2 mmh3==3.0.0 mock==2.0.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.2 +numpy==1.23.5 oauth2client==4.1.3 +oauthlib==3.2.2 objsize==0.6.1 -orjson==3.8.6 +opt-einsum==3.3.0 +orjson==3.8.7 overrides==6.5.0 packaging==23.0 pandas==1.5.3 @@ -89,7 +104,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==4.22.0 +protobuf==4.22.1 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -101,7 +116,7 @@ PyHamcrest==1.10.1 pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 -pytest==7.2.1 +pytest==7.2.2 pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 @@ -112,21 +127,31 @@ PyYAML==6.0 regex==2022.10.31 requests==2.28.2 requests-mock==1.10.0 +requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.2.1 -scipy==1.10.0 +scipy==1.10.1 six==1.16.0 sortedcontainers==2.4.0 soupsieve==2.4 SQLAlchemy==1.4.46 sqlparse==0.4.3 tenacity==5.1.5 +tensorboard==2.12.0 +tensorboard-data-server==0.7.0 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.12.0rc1 +tensorflow-estimator==2.12.0rc0 +tensorflow-io-gcs-filesystem==0.31.0 +termcolor==2.2.0 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 -tqdm==4.64.1 +tqdm==4.65.0 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 +Werkzeug==2.2.3 wrapt==1.14.1 -zstandard==0.19.0 +zipp==3.15.0 +zstandard==0.20.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index 2bf4c9e6ea0f..d05a0e52ecf1 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -21,17 +21,19 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. +absl-py==1.4.0 +astunparse==1.6.3 attrs==22.2.0 beautifulsoup4==4.11.2 bs4==0.0.1 cachetools==4.2.4 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 click==8.1.3 cloudpickle==2.2.1 crcmod==1.7 -cryptography==39.0.1 +cryptography==39.0.2 Cython==0.29.33 deprecation==2.1.0 dill==0.3.1.1 @@ -39,49 +41,62 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.1 +fastavro==1.7.2 fasteners==0.18 +flatbuffers==23.3.3 freezegun==1.2.2 future==0.18.3 +gast==0.4.0 google-api-core==2.11.0 google-apitools==0.5.31 -google-auth==2.16.1 +google-auth==2.16.2 google-auth-httplib2==0.1.0 -google-cloud-bigquery==3.5.0 -google-cloud-bigquery-storage==2.18.1 -google-cloud-bigtable==2.15.0 +google-auth-oauthlib==0.4.6 +google-cloud-bigquery==3.6.0 +google-cloud-bigquery-storage==2.19.0 +google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.13.2 -google-cloud-dlp==3.11.1 +google-cloud-datastore==2.14.0 +google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 -google-cloud-pubsub==2.14.1 +google-cloud-pubsub==2.15.0 google-cloud-pubsublite==1.7.0 -google-cloud-recommendations-ai==0.10.1 -google-cloud-spanner==3.27.1 -google-cloud-videointelligence==2.10.1 -google-cloud-vision==3.3.1 +google-cloud-recommendations-ai==0.10.2 +google-cloud-spanner==3.28.0 +google-cloud-videointelligence==2.11.0 +google-cloud-vision==3.4.0 google-crc32c==1.5.0 +google-pasta==0.2.0 google-resumable-media==2.4.1 googleapis-common-protos==1.58.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.51.1 -grpcio-status==1.51.1 +grpcio==1.51.3 +grpcio-status==1.51.3 guppy3==3.1.2 +h5py==3.8.0 hdfs==2.7.0 httplib2==0.21.0 hypothesis==6.68.2 idna==3.4 +importlib-metadata==6.0.0 iniconfig==2.0.0 +jax==0.4.5 joblib==1.2.0 +keras==2.12.0rc1 +libclang==15.0.6.1 +Markdown==3.4.1 +MarkupSafe==2.1.2 mmh3==3.0.0 mock==2.0.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.2 +numpy==1.23.5 oauth2client==4.1.3 +oauthlib==3.2.2 objsize==0.6.1 -orjson==3.8.6 +opt-einsum==3.3.0 +orjson==3.8.7 overrides==6.5.0 packaging==23.0 pandas==1.5.3 @@ -89,7 +104,7 @@ parameterized==0.8.1 pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 -protobuf==4.22.0 +protobuf==4.22.1 psycopg2-binary==2.9.5 py==1.11.0 pyarrow==9.0.0 @@ -101,7 +116,7 @@ PyHamcrest==1.10.1 pymongo==3.13.0 PyMySQL==1.0.2 pyparsing==3.0.9 -pytest==7.2.1 +pytest==7.2.2 pytest-forked==1.6.0 pytest-timeout==2.1.0 pytest-xdist==2.5.0 @@ -112,21 +127,31 @@ PyYAML==6.0 regex==2022.10.31 requests==2.28.2 requests-mock==1.10.0 +requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.2.1 -scipy==1.10.0 +scipy==1.10.1 six==1.16.0 sortedcontainers==2.4.0 soupsieve==2.4 SQLAlchemy==1.4.46 sqlparse==0.4.3 tenacity==5.1.5 +tensorboard==2.12.0 +tensorboard-data-server==0.7.0 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.12.0rc1 +tensorflow-estimator==2.12.0rc0 +tensorflow-io-gcs-filesystem==0.31.0 +termcolor==2.2.0 testcontainers==3.7.1 threadpoolctl==3.1.0 tomli==2.0.1 -tqdm==4.64.1 +tqdm==4.65.0 typing_extensions==4.5.0 urllib3==1.26.14 websocket-client==1.5.1 +Werkzeug==2.2.3 wrapt==1.14.1 -zstandard==0.19.0 +zipp==3.15.0 +zstandard==0.20.0 From c542de569b73207cfcc518473bba20b6f5ea5bc9 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 8 Mar 2023 15:38:26 -0500 Subject: [PATCH 48/64] comment onnx task --- sdks/python/test-suites/tox/py38/build.gradle | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index 97954929a88e..a5439e5d4cd9 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -103,9 +103,9 @@ test.dependsOn "testPy38pytorch-113" preCommitPyCoverage.dependsOn "testPy38pytorch-113" // Create a test task for each minor version of onnx -toxTask "testPy38onnx-113", "py38-onnx-113", "${posargs}" -test.dependsOn "testPy38onnx-113" -preCommitPyCoverage.dependsOn "testPy38onnx-113" +// toxTask "testPy38onnx-113", "py38-onnx-113", "${posargs}" +// test.dependsOn "testPy38onnx-113" +// preCommitPyCoverage.dependsOn "testPy38onnx-113" // Create a test task for each minor version of tensorflow toxTask "testPy38tensorflow-212", "py38-tensorflow-212", "${posargs}" test.dependsOn "testPy38tensorflow-212" From 2f94ddd5f54140cbeebeaf2d9ac1b8cae50cec95 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 12:18:49 -0500 Subject: [PATCH 49/64] Change bound on google-cloud-recommendations --- sdks/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index c78c28ba840f..02d127650f58 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -311,7 +311,7 @@ def get_portability_package_data(): 'google-cloud-language>=2.0,<3', 'google-cloud-videointelligence>=2.0,<3', 'google-cloud-vision>=2,<4', - 'google-cloud-recommendations-ai>=0.1.0,<1' + 'google-cloud-recommendations-ai>=0.1.0,<0.11' ], 'interactive': [ 'facets-overview>=1.1.0,<2', From 5bafc657aa807908d4609da7389ab83cdf31f4e2 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 12:25:45 -0500 Subject: [PATCH 50/64] Modify FakeMutation to support mocking proto messages --- .../gcp/datastore/v1new/datastoreio_test.py | 23 ++++++++++++------- .../io/gcp/datastore/v1new/util.py | 17 +------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py index 3aaa658f79e6..5f29c52f0ce7 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py @@ -49,8 +49,20 @@ client = None -# TODO(https://github.com/apache/beam/issues/25625) -# remove this FakeMutation and replace it with proto.Message class. +# used for internal testing only +class ByteSize(): + def __init__(self, entity, key): + self.entity = entity + self.key = key + + def ByteSize(self): + if self.entity is not None: + return util.extract_byte_size(helpers.entity_to_protobuf(self.entity)) + else: + return util.extract_byte_size(self.key.to_protobuf()) + + +# used for internal testing only class FakeMutation(object): def __init__(self, entity=None, key=None): """Fake mutation request object. @@ -65,12 +77,7 @@ def __init__(self, entity=None, key=None): """ self.entity = entity self.key = key - - def ByteSize(self): - if self.entity is not None: - return util.extract_byte_size(helpers.entity_to_protobuf(self.entity)) - else: - return util.extract_byte_size(self.key.to_protobuf()) + self._pb = ByteSize(entity, key) class FakeBatch(object): diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py index b9e55ffc87fa..c3d83611c304 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py @@ -140,19 +140,4 @@ def report_latency(self, now, latency_ms, num_mutations): def extract_byte_size(proto_message): - """ - Gets the byte size from a google.protobuf or proto-plus message - - google-cloud-datastore moved from using protobuf to using - proto-plus messages. - protobuf object has attribute ByteSize() but proto.Message() objects - don't. Workaround: - https://github.com/googleapis/proto-plus-python/issues/163 - """ - if hasattr(proto_message, "ByteSize"): - # google.protobuf message - return proto_message.ByteSize() - if hasattr(type(proto_message), "pb"): - # proto-plus message - return type(proto_message).pb(proto_message).ByteSize() - return NotImplementedError + return proto_message._pb.ByteSize() From dd6dbec40983132d348ca72fa9040137159cc9bf Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 12:39:36 -0500 Subject: [PATCH 51/64] Update comment --- sdks/python/apache_beam/ml/gcp/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/ml/gcp/__init__.py b/sdks/python/apache_beam/ml/gcp/__init__.py index 523e689e347e..63e52f1d1f17 100644 --- a/sdks/python/apache_beam/ml/gcp/__init__.py +++ b/sdks/python/apache_beam/ml/gcp/__init__.py @@ -16,7 +16,10 @@ # # TODO(https://github.com/apache/beam/issues/25625) -# update relevant code -> example: use class something(beam.PTransform) -# instead of decorator @beam.ptransform_fn on a function. -# update type annotations and doc strings. +# update code. For example, +# use class something(beam.PTransform) instead of decorator @beam.ptransform_fn +# on a function. + +# update type annotations to follow pep 484 https://peps.python.org/pep-0484/ +# and also update doc strings. From bc58086f250591dfc81b5306a09f3c338d02ad0e Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 12:45:15 -0500 Subject: [PATCH 52/64] Add todo for uncommenting onnx changes --- sdks/python/test-suites/tox/py38/build.gradle | 1 + sdks/python/tox.ini | 1 + 2 files changed, 2 insertions(+) diff --git a/sdks/python/test-suites/tox/py38/build.gradle b/sdks/python/test-suites/tox/py38/build.gradle index a5439e5d4cd9..16b9297e7ca6 100644 --- a/sdks/python/test-suites/tox/py38/build.gradle +++ b/sdks/python/test-suites/tox/py38/build.gradle @@ -102,6 +102,7 @@ toxTask "testPy38pytorch-113", "py38-pytorch-113", "${posargs}" test.dependsOn "testPy38pytorch-113" preCommitPyCoverage.dependsOn "testPy38pytorch-113" +// TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task once onnx supports protobuf 4.x.x // Create a test task for each minor version of onnx // toxTask "testPy38onnx-113", "py38-onnx-113", "${posargs}" // test.dependsOn "testPy38onnx-113" diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index a87d617843a6..9a2309f83cb0 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -320,6 +320,7 @@ commands = # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' +# TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task once onnx supports protobuf 4.x.x # [testenv:py{37,38,39,310}-onnx-113] # TODO(https://github.com/apache/beam/issues/25443) # apparently tox has problem when substitution key has single value. Change back to -onnx-{113,...} From e35bdef27c439305a0813dd1975d5e626fc2304d Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 12:51:40 -0500 Subject: [PATCH 53/64] update comment --- sdks/python/apache_beam/runners/pipeline_context.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py index 2c6181f0a0eb..12a1e2c6211b 100644 --- a/sdks/python/apache_beam/runners/pipeline_context.py +++ b/sdks/python/apache_beam/runners/pipeline_context.py @@ -310,8 +310,9 @@ def get_or_create_environment_with_resource_hints( """Creates an environment that has necessary hints and returns its id.""" template_env = self.environments.get_proto_from_id(template_env_id) cloned_env = beam_runner_api_pb2.Environment() + # (TODO https://github.com/apache/beam/issues/25615) # Remove the suppress warning for type once mypy is updated to - # newer version. https://github.com/apache/beam/issues/25615 + # newer version. mypy 0.790 throws the warning below but 0.941 doesn't. # error: Argument 1 to "CopyFrom" of "Message" has incompatible type # "Message"; expected "Environment" [arg-type] # Here, Environment is a subclass of Message but mypy still From b617f6085f6943b9533c49ef97009cc7bfcfbbe8 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 13:17:17 -0500 Subject: [PATCH 54/64] Update dependencies --- .../container/py310/base_image_requirements.txt | 11 +++++------ .../python/container/py37/base_image_requirements.txt | 7 +++---- .../python/container/py38/base_image_requirements.txt | 11 +++++------ .../python/container/py39/base_image_requirements.txt | 11 +++++------ 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 456c08fcf7f7..35dd2b960603 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -41,7 +41,7 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.2 +fastavro==1.7.3 fasteners==0.18 flatbuffers==23.3.3 freezegun==1.2.2 @@ -56,7 +56,7 @@ google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.19.0 google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.14.0 +google-cloud-datastore==2.15.0 google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 google-cloud-pubsub==2.15.0 @@ -80,14 +80,14 @@ httplib2==0.21.0 hypothesis==6.68.2 idna==3.4 iniconfig==2.0.0 -jax==0.4.5 +jax==0.4.6 joblib==1.2.0 keras==2.12.0rc1 libclang==15.0.6.1 Markdown==3.4.1 MarkupSafe==2.1.2 mmh3==3.0.0 -mock==2.0.0 +mock==5.0.1 nltk==3.8.1 nose==1.3.7 numpy==1.23.5 @@ -100,7 +100,6 @@ overrides==6.5.0 packaging==23.0 pandas==1.5.3 parameterized==0.8.1 -pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 protobuf==4.22.1 @@ -128,7 +127,7 @@ requests==2.28.2 requests-mock==1.10.0 requests-oauthlib==1.3.1 rsa==4.9 -scikit-learn==1.2.1 +scikit-learn==1.2.2 scipy==1.10.1 six==1.16.0 sortedcontainers==2.4.0 diff --git a/sdks/python/container/py37/base_image_requirements.txt b/sdks/python/container/py37/base_image_requirements.txt index acc7ea85c588..e57e27888ffa 100644 --- a/sdks/python/container/py37/base_image_requirements.txt +++ b/sdks/python/container/py37/base_image_requirements.txt @@ -39,7 +39,7 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.2 +fastavro==1.7.3 fasteners==0.18 freezegun==1.2.2 future==0.18.3 @@ -51,7 +51,7 @@ google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.19.0 google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.14.0 +google-cloud-datastore==2.15.0 google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 google-cloud-pubsub==2.15.0 @@ -76,7 +76,7 @@ importlib-metadata==6.0.0 iniconfig==2.0.0 joblib==1.2.0 mmh3==3.0.0 -mock==2.0.0 +mock==5.0.1 nltk==3.8.1 nose==1.3.7 numpy==1.21.6 @@ -87,7 +87,6 @@ overrides==6.5.0 packaging==23.0 pandas==1.3.5 parameterized==0.8.1 -pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 protobuf==4.22.1 diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt index fc892f79de54..af2a4c13d349 100644 --- a/sdks/python/container/py38/base_image_requirements.txt +++ b/sdks/python/container/py38/base_image_requirements.txt @@ -41,7 +41,7 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.2 +fastavro==1.7.3 fasteners==0.18 flatbuffers==23.3.3 freezegun==1.2.2 @@ -56,7 +56,7 @@ google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.19.0 google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.14.0 +google-cloud-datastore==2.15.0 google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 google-cloud-pubsub==2.15.0 @@ -81,14 +81,14 @@ hypothesis==6.68.2 idna==3.4 importlib-metadata==6.0.0 iniconfig==2.0.0 -jax==0.4.5 +jax==0.4.6 joblib==1.2.0 keras==2.12.0rc1 libclang==15.0.6.1 Markdown==3.4.1 MarkupSafe==2.1.2 mmh3==3.0.0 -mock==2.0.0 +mock==5.0.1 nltk==3.8.1 nose==1.3.7 numpy==1.23.5 @@ -101,7 +101,6 @@ overrides==6.5.0 packaging==23.0 pandas==1.5.3 parameterized==0.8.1 -pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 protobuf==4.22.1 @@ -129,7 +128,7 @@ requests==2.28.2 requests-mock==1.10.0 requests-oauthlib==1.3.1 rsa==4.9 -scikit-learn==1.2.1 +scikit-learn==1.2.2 scipy==1.10.1 six==1.16.0 sortedcontainers==2.4.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index d05a0e52ecf1..153a8c2e1db9 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -41,7 +41,7 @@ docker==6.0.1 docopt==0.6.2 exceptiongroup==1.1.0 execnet==1.9.0 -fastavro==1.7.2 +fastavro==1.7.3 fasteners==0.18 flatbuffers==23.3.3 freezegun==1.2.2 @@ -56,7 +56,7 @@ google-cloud-bigquery==3.6.0 google-cloud-bigquery-storage==2.19.0 google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 -google-cloud-datastore==2.14.0 +google-cloud-datastore==2.15.0 google-cloud-dlp==3.12.0 google-cloud-language==2.9.0 google-cloud-pubsub==2.15.0 @@ -81,14 +81,14 @@ hypothesis==6.68.2 idna==3.4 importlib-metadata==6.0.0 iniconfig==2.0.0 -jax==0.4.5 +jax==0.4.6 joblib==1.2.0 keras==2.12.0rc1 libclang==15.0.6.1 Markdown==3.4.1 MarkupSafe==2.1.2 mmh3==3.0.0 -mock==2.0.0 +mock==5.0.1 nltk==3.8.1 nose==1.3.7 numpy==1.23.5 @@ -101,7 +101,6 @@ overrides==6.5.0 packaging==23.0 pandas==1.5.3 parameterized==0.8.1 -pbr==5.11.1 pluggy==1.0.0 proto-plus==1.22.2 protobuf==4.22.1 @@ -129,7 +128,7 @@ requests==2.28.2 requests-mock==1.10.0 requests-oauthlib==1.3.1 rsa==4.9 -scikit-learn==1.2.1 +scikit-learn==1.2.2 scipy==1.10.1 six==1.16.0 sortedcontainers==2.4.0 From 334846aa10323d8ebcbe1a03cddc211f8490e1c0 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 13:55:06 -0500 Subject: [PATCH 55/64] revert doc change --- sdks/python/apache_beam/ml/gcp/naturallanguageml.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py index fc93f0350741..4f63aef68232 100644 --- a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py +++ b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py @@ -92,11 +92,11 @@ def AnnotateText( https://cloud.google.com/natural-language/docs. Args: - pcoll (:class:`~apache_beam.pvalue.PCollection`): An input PCollection - of :class:`Document` objects. - - features: A dictionary of natural language operations to be performed - on given text in the following format:: + pcoll (:class:`~apache_beam.pvalue.PCollection`): An input PCollection of + :class:`Document` objects. + features (`Union[Mapping[str, bool], types.AnnotateTextRequest.Features]`): + A dictionary of natural language operations to be performed on given + text in the following format:: {'extact_syntax'=True, 'extract_entities'=True} timeout (`Optional[float]`): The amount of time, in seconds, to wait From 15db416e54eff06a7cbdd7aa2e116e75c3a25955 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 14:44:09 -0500 Subject: [PATCH 56/64] Refactor code/remove helper function from util --- .../apache_beam/io/gcp/datastore/v1new/datastoreio.py | 3 +-- .../io/gcp/datastore/v1new/datastoreio_test.py | 8 ++++---- sdks/python/apache_beam/io/gcp/datastore/v1new/util.py | 4 ---- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py index ab5b836ae5e3..17568cdd7257 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py @@ -477,8 +477,7 @@ def process(self, element): client_element = self.element_to_client_batch_item(element) self._batch_elements.append(client_element) self.add_to_batch(client_element) - self._batch_bytes_size += util.extract_byte_size( - self._batch.mutations[-1]) + self._batch_bytes_size += self._batch.mutations[-1]._pb.ByteSize() if (len(self._batch.mutations) >= self._target_batch_size or self._batch_bytes_size > util.WRITE_BATCH_MAX_BYTES_SIZE): diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py index 5f29c52f0ce7..05d99964d416 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py @@ -50,16 +50,16 @@ # used for internal testing only -class ByteSize(): +class FakeByteSize(): def __init__(self, entity, key): self.entity = entity self.key = key def ByteSize(self): if self.entity is not None: - return util.extract_byte_size(helpers.entity_to_protobuf(self.entity)) + return helpers.entity_to_protobuf(self.entity)._pb.ByteSize() else: - return util.extract_byte_size(self.key.to_protobuf()) + return self.key.to_protobuf()._pb.ByteSize() # used for internal testing only @@ -77,7 +77,7 @@ def __init__(self, entity=None, key=None): """ self.entity = entity self.key = key - self._pb = ByteSize(entity, key) + self._pb = FakeByteSize(entity, key) class FakeBatch(object): diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py index c3d83611c304..06a22143f59d 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/util.py @@ -137,7 +137,3 @@ def report_latency(self, now, latency_ms, num_mutations): num_mutations: int, number of mutations contained in the RPC. """ self._commit_time_per_entity_ms.add(now, latency_ms / num_mutations) - - -def extract_byte_size(proto_message): - return proto_message._pb.ByteSize() From 2558b6b5d7d97afb4e1f729d826144c824a13fb8 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 14:49:24 -0500 Subject: [PATCH 57/64] Update comment --- sdks/python/apache_beam/runners/pipeline_context.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py index 12a1e2c6211b..102b8b60d69a 100644 --- a/sdks/python/apache_beam/runners/pipeline_context.py +++ b/sdks/python/apache_beam/runners/pipeline_context.py @@ -311,8 +311,9 @@ def get_or_create_environment_with_resource_hints( template_env = self.environments.get_proto_from_id(template_env_id) cloned_env = beam_runner_api_pb2.Environment() # (TODO https://github.com/apache/beam/issues/25615) - # Remove the suppress warning for type once mypy is updated to - # newer version. mypy 0.790 throws the warning below but 0.941 doesn't. + # Remove the suppress warning for type once mypy is updated to 0.941 or + # higher. + # mypy 0.790 throws the warning below but 0.941 doesn't. # error: Argument 1 to "CopyFrom" of "Message" has incompatible type # "Message"; expected "Environment" [arg-type] # Here, Environment is a subclass of Message but mypy still From 454481ed5a6b8649918a546be28b9300e074b65b Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 14:55:22 -0500 Subject: [PATCH 58/64] Uncomment changes --- sdks/python/tox.ini | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 9a2309f83cb0..926625515603 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -320,25 +320,25 @@ commands = # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_pytorch {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' -# TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task once onnx supports protobuf 4.x.x -# [testenv:py{37,38,39,310}-onnx-113] +# TODO(https://github.com/apache/beam/issues/25796) - uncomment onnx tox task in tox/py38/build.gradle once onnx supports protobuf 4.x.x +[testenv:py{37,38,39,310}-onnx-113] # TODO(https://github.com/apache/beam/issues/25443) # apparently tox has problem when substitution key has single value. Change back to -onnx-{113,...} # when multiple onnx versions are tested. -# deps = -# onnxruntime==1.13.1 -# pandas==1.5.2 -# torch==1.13.1 -# tensorflow==2.11.0 -# tf2onnx==1.13.0 -# skl2onnx==1.13 -# transformers==4.25.1 -# extras = test,gcp -# commands = -# # Log onnx version for debugging -# /bin/sh -c "pip freeze | grep -E onnx" -# # Run all ONNX unit tests -# pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} +deps = + onnxruntime==1.13.1 + pandas==1.5.2 + torch==1.13.1 + tensorflow==2.11.0 + tf2onnx==1.13.0 + skl2onnx==1.13 + transformers==4.25.1 +extras = test,gcp +commands = + # Log onnx version for debugging + /bin/sh -c "pip freeze | grep -E onnx" + # Run all ONNX unit tests + pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} [testenv:py{37,38,39,310}-tensorflow-{212}] deps = From af2de212b460a5bf7c8799453bdf8456d792c6e8 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 15:34:46 -0500 Subject: [PATCH 59/64] Update tensorflow --- .../apache_beam/ml/inference/tensorflow_tests_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt index 8a9deba61dd8..817bdce05f50 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -15,7 +15,7 @@ # limitations under the License. # -tensorflow>=1.0.0 +tensorflow>=2.12rc1 tensorflow_hub>=0.10.0 Pillow>=9.0.0 From 68933983245c62dcfec22ed2595e576622bff82f Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 16:19:40 -0500 Subject: [PATCH 60/64] Comment tfx-bsl tests --- sdks/python/test-suites/direct/common.gradle | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 80e0bf052e57..a873a1f9a10f 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -313,7 +313,9 @@ project.tasks.register("inferencePostCommitIT") { dependsOn = [ 'torchInferenceTest', 'sklearnInferenceTest', - 'tfxInferenceTest', + // (TODO) https://github.com/apache/beam/issues/25799 + // uncomment tfx bsl tests once tfx supports protobuf 4.x + // 'tfxInferenceTest', 'tensorflowInferenceTest' ] } From f72eaa3c49e429cc739a256b4b3d40cfb46218eb Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 16:53:19 -0500 Subject: [PATCH 61/64] Update tox.ini syntax --- sdks/python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 926625515603..82e5cec9b1a5 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -340,7 +340,7 @@ commands = # Run all ONNX unit tests pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} -[testenv:py{37,38,39,310}-tensorflow-{212}] +[testenv:py{37,38,39,310}-tensorflow-212] deps = -r build-requirements.txt 212: tensorflow>=2.12rc1,<2.13 From a29505f0f4e8e7c1d09f4eb48f9bb00dffedd38a Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Fri, 10 Mar 2023 16:55:17 -0500 Subject: [PATCH 62/64] refactor --- .../apache_beam/io/gcp/datastore/v1new/datastoreio_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py index 05d99964d416..076a95178d83 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py @@ -50,7 +50,7 @@ # used for internal testing only -class FakeByteSize(): +class FakeMessage: def __init__(self, entity, key): self.entity = entity self.key = key @@ -77,7 +77,7 @@ def __init__(self, entity=None, key=None): """ self.entity = entity self.key = key - self._pb = FakeByteSize(entity, key) + self._pb = FakeMessage(entity, key) class FakeBatch(object): From ba8b37621abb6fb6b572c482a890dff7b9c9e3ae Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Sun, 12 Mar 2023 14:18:00 -0400 Subject: [PATCH 63/64] fix requirements --- .../apache_beam/ml/inference/tensorflow_tests_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt index 817bdce05f50..03dca6ca2344 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -15,7 +15,7 @@ # limitations under the License. # -tensorflow>=2.12rc1 +tensorflow>=2.12.0rc1 tensorflow_hub>=0.10.0 Pillow>=9.0.0 From 553407353e4d4dca20f0ceb7ff70c39bde2b4e66 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Mon, 13 Mar 2023 10:41:18 -0400 Subject: [PATCH 64/64] Skip tensorflow tests on Python 3.7 --- .../apache_beam/ml/inference/tensorflow_inference_it_test.py | 5 +++++ sdks/python/tox.ini | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py index bdc0291dd1ed..9c814062e6ed 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_it_test.py @@ -18,6 +18,7 @@ """End-to-End test for Tensorflow Inference""" import logging +import sys import unittest import uuid from pathlib import Path @@ -66,6 +67,10 @@ def clear_tf_hub_temp_dir(model_path): rmdir(local_path) +@unittest.skipIf( + sys.version_info.major == 3 and sys.version_info.minor == 7, + "Tensorflow tests on Python 3.7 with Apache Beam 2.47.0 or " + "greater are skipped since tensorflow>=2.12 doesn't support Python 3.7") @unittest.skipIf( tf is None, 'Missing dependencies. ' 'Test depends on tensorflow') diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 82e5cec9b1a5..db0dc55ca623 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -340,7 +340,7 @@ commands = # Run all ONNX unit tests pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_onnx {posargs} -[testenv:py{37,38,39,310}-tensorflow-212] +[testenv:py{38,39,310}-tensorflow-212] deps = -r build-requirements.txt 212: tensorflow>=2.12rc1,<2.13