From c15e3316bf120422b1150b1d3a9389ffe3519bfa Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Sat, 16 Aug 2025 12:46:54 +0000 Subject: [PATCH 01/29] sdks/python: properly make milvus as extra dependency --- sdks/python/setup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index f7cd2a808a9a..382f7c263741 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -160,6 +160,8 @@ def cythonize(*args, **kwargs): 'pandas>=1.4.3,!=1.5.0,!=1.5.1,<2.3', ] +milvus_dependency = ['pymilvus>=2.5.10,<3.0.0'] + def find_by_ext(root_dir, ext): for root, _, files in os.walk(root_dir): @@ -444,9 +446,8 @@ def get_portability_package_data(): 'mysql-connector-python>=9.3.0', 'python-tds>=1.16.1', 'sqlalchemy-pytds>=1.0.2', - 'oracledb>=3.1.1', - 'milvus' - ], + 'oracledb>=3.1.1' + ] + milvus_dependency, 'gcp': [ 'cachetools>=3.1.0,<7', 'google-api-core>=2.0.0,<3', @@ -589,7 +590,7 @@ def get_portability_package_data(): ], 'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'], 'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'], - 'milvus': ['pymilvus>=2.5.10,<3.0.0'] + 'milvus': milvus_dependency }, zip_safe=False, # PyPI package information. From 316a41fd1453d8c120b0bc34b345e315a995fc5f Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Wed, 20 Aug 2025 11:32:13 +0000 Subject: [PATCH 02/29] sdks/python: update image requirements --- .../py310/base_image_requirements.txt | 44 +++++++-------- .../container/py310/ml_image_requirements.txt | 54 +++++++++---------- .../py311/base_image_requirements.txt | 44 +++++++-------- .../container/py311/ml_image_requirements.txt | 54 +++++++++---------- .../py312/base_image_requirements.txt | 44 +++++++-------- .../container/py312/ml_image_requirements.txt | 54 +++++++++---------- .../py313/base_image_requirements.txt | 42 +++++++-------- .../py39/base_image_requirements.txt | 44 +++++++-------- .../container/py39/ml_image_requirements.txt | 50 ++++++++--------- 9 files changed, 215 insertions(+), 215 deletions(-) diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 58d76f711cd9..637620fd7542 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -37,12 +37,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -51,16 +51,16 @@ docstring_parser==0.17.0 exceptiongroup==1.3.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -freezegun==1.5.4 +fasteners==0.20 +freezegun==1.5.5 frozenlist==1.7.0 future==1.0.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -73,12 +73,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 greenlet==3.2.4 @@ -92,29 +92,27 @@ hdfs==2.7.3 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 @@ -124,7 +122,7 @@ opentelemetry-api==1.36.0 opentelemetry-sdk==1.36.0 opentelemetry-semantic-conventions==0.57b0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -146,7 +144,8 @@ pydantic_core==2.33.2 pydot==1.4.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -155,13 +154,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rpds-py==0.27.0 rsa==4.9.1 @@ -175,7 +174,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 tenacity==8.5.0 @@ -186,12 +185,13 @@ tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py310/ml_image_requirements.txt b/sdks/python/container/py310/ml_image_requirements.txt index 1b8c6e64b6e5..7d39fce68269 100644 --- a/sdks/python/container/py310/ml_image_requirements.txt +++ b/sdks/python/container/py310/ml_image_requirements.txt @@ -39,12 +39,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -53,20 +53,20 @@ docstring_parser==0.17.0 exceptiongroup==1.3.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -filelock==3.18.0 +fasteners==0.20 +filelock==3.19.1 flatbuffers==25.2.10 -freezegun==1.5.4 +freezegun==1.5.5 frozenlist==1.7.0 fsspec==2025.7.0 future==1.0.0 gast==0.6.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -79,12 +79,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-pasta==0.2.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 @@ -97,41 +97,39 @@ guppy3==3.1.5 h11==0.16.0 h5py==3.14.0 hdfs==2.7.3 -hf-xet==1.1.7 +hf-xet==1.1.8 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 huggingface-hub==0.34.4 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 -keras==3.11.1 +keras==3.11.2 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 libclang==18.1.1 Markdown==3.8.2 -markdown-it-py==3.0.0 +markdown-it-py==4.0.0 MarkupSafe==3.0.2 mdurl==0.1.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 ml-dtypes==0.3.2 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.4.2 @@ -159,7 +157,7 @@ opentelemetry-semantic-conventions==0.57b0 opt_einsum==3.4.0 optree==0.17.0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -182,7 +180,8 @@ pydot==1.4.2 Pygments==2.19.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -191,13 +190,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rich==14.1.0 rpds-py==0.27.0 @@ -213,7 +212,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 sympy==1.14.0 @@ -230,18 +229,19 @@ tokenizers==0.21.4 tomli==2.2.1 torch==2.7.1 tqdm==4.67.1 -transformers==4.48.3 +transformers==4.55.2 triton==3.3.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 Werkzeug==3.1.3 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt index 7d2c0bb4fa2c..6168168661a8 100644 --- a/sdks/python/container/py311/base_image_requirements.txt +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -36,12 +36,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -49,16 +49,16 @@ docopt==0.6.2 docstring_parser==0.17.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -freezegun==1.5.4 +fasteners==0.20 +freezegun==1.5.5 frozenlist==1.7.0 future==1.0.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -71,12 +71,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 greenlet==3.2.4 @@ -90,29 +90,27 @@ hdfs==2.7.3 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 @@ -122,7 +120,7 @@ opentelemetry-api==1.36.0 opentelemetry-sdk==1.36.0 opentelemetry-semantic-conventions==0.57b0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -144,7 +142,8 @@ pydantic_core==2.33.2 pydot==1.4.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -153,13 +152,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rpds-py==0.27.0 rsa==4.9.1 @@ -173,7 +172,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 tenacity==8.5.0 @@ -183,12 +182,13 @@ tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py311/ml_image_requirements.txt b/sdks/python/container/py311/ml_image_requirements.txt index 968771453ff7..2e9f1d892339 100644 --- a/sdks/python/container/py311/ml_image_requirements.txt +++ b/sdks/python/container/py311/ml_image_requirements.txt @@ -38,12 +38,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -51,20 +51,20 @@ docopt==0.6.2 docstring_parser==0.17.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -filelock==3.18.0 +fasteners==0.20 +filelock==3.19.1 flatbuffers==25.2.10 -freezegun==1.5.4 +freezegun==1.5.5 frozenlist==1.7.0 fsspec==2025.7.0 future==1.0.0 gast==0.6.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -77,12 +77,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-pasta==0.2.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 @@ -95,41 +95,39 @@ guppy3==3.1.5 h11==0.16.0 h5py==3.14.0 hdfs==2.7.3 -hf-xet==1.1.7 +hf-xet==1.1.8 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 huggingface-hub==0.34.4 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 -keras==3.11.1 +keras==3.11.2 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 libclang==18.1.1 Markdown==3.8.2 -markdown-it-py==3.0.0 +markdown-it-py==4.0.0 MarkupSafe==3.0.2 mdurl==0.1.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 ml-dtypes==0.3.2 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.5 @@ -157,7 +155,7 @@ opentelemetry-semantic-conventions==0.57b0 opt_einsum==3.4.0 optree==0.17.0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -180,7 +178,8 @@ pydot==1.4.2 Pygments==2.19.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -189,13 +188,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rich==14.1.0 rpds-py==0.27.0 @@ -211,7 +210,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 sympy==1.14.0 @@ -227,18 +226,19 @@ threadpoolctl==3.6.0 tokenizers==0.21.4 torch==2.7.1 tqdm==4.67.1 -transformers==4.48.3 +transformers==4.55.2 triton==3.3.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 Werkzeug==3.1.3 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py312/base_image_requirements.txt b/sdks/python/container/py312/base_image_requirements.txt index 3489ac327b93..116ca10f5c02 100644 --- a/sdks/python/container/py312/base_image_requirements.txt +++ b/sdks/python/container/py312/base_image_requirements.txt @@ -35,12 +35,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -48,16 +48,16 @@ docopt==0.6.2 docstring_parser==0.17.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -freezegun==1.5.4 +fasteners==0.20 +freezegun==1.5.5 frozenlist==1.7.0 future==1.0.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -70,12 +70,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 greenlet==3.2.4 @@ -89,29 +89,27 @@ hdfs==2.7.3 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 @@ -121,7 +119,7 @@ opentelemetry-api==1.36.0 opentelemetry-sdk==1.36.0 opentelemetry-semantic-conventions==0.57b0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -143,7 +141,8 @@ pydantic_core==2.33.2 pydot==1.4.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -152,13 +151,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rpds-py==0.27.0 rsa==4.9.1 @@ -172,7 +171,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 tenacity==8.5.0 @@ -182,12 +181,13 @@ tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py312/ml_image_requirements.txt b/sdks/python/container/py312/ml_image_requirements.txt index 85142ae3c935..54df0e0cafb8 100644 --- a/sdks/python/container/py312/ml_image_requirements.txt +++ b/sdks/python/container/py312/ml_image_requirements.txt @@ -37,12 +37,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -50,20 +50,20 @@ docopt==0.6.2 docstring_parser==0.17.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -filelock==3.18.0 +fasteners==0.20 +filelock==3.19.1 flatbuffers==25.2.10 -freezegun==1.5.4 +freezegun==1.5.5 frozenlist==1.7.0 fsspec==2025.7.0 future==1.0.0 gast==0.6.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -76,12 +76,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-pasta==0.2.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 @@ -94,41 +94,39 @@ guppy3==3.1.5 h11==0.16.0 h5py==3.14.0 hdfs==2.7.3 -hf-xet==1.1.7 +hf-xet==1.1.8 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 huggingface-hub==0.34.4 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 -keras==3.11.1 +keras==3.11.2 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 libclang==18.1.1 Markdown==3.8.2 -markdown-it-py==3.0.0 +markdown-it-py==4.0.0 MarkupSafe==3.0.2 mdurl==0.1.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 ml-dtypes==0.3.2 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.5 @@ -156,7 +154,7 @@ opentelemetry-semantic-conventions==0.57b0 opt_einsum==3.4.0 optree==0.17.0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -179,7 +177,8 @@ pydot==1.4.2 Pygments==2.19.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -188,13 +187,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rich==14.1.0 rpds-py==0.27.0 @@ -210,7 +209,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 sympy==1.14.0 @@ -225,18 +224,19 @@ threadpoolctl==3.6.0 tokenizers==0.21.4 torch==2.7.1 tqdm==4.67.1 -transformers==4.48.3 +transformers==4.55.2 triton==3.3.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 Werkzeug==3.1.3 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py313/base_image_requirements.txt b/sdks/python/container/py313/base_image_requirements.txt index 553318c54cca..c86c612f3847 100644 --- a/sdks/python/container/py313/base_image_requirements.txt +++ b/sdks/python/container/py313/base_image_requirements.txt @@ -35,12 +35,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -48,15 +48,15 @@ docopt==0.6.2 docstring_parser==0.17.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -freezegun==1.5.4 +fasteners==0.20 +freezegun==1.5.5 frozenlist==1.7.0 future==1.0.0 google-api-core==2.25.1 google-apitools==0.5.32 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -68,12 +68,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 greenlet==3.2.4 @@ -87,29 +87,27 @@ hdfs==2.7.3 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 @@ -119,7 +117,7 @@ opentelemetry-api==1.36.0 opentelemetry-sdk==1.36.0 opentelemetry-semantic-conventions==0.57b0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -141,7 +139,8 @@ pydantic_core==2.33.2 pydot==1.4.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.6.0 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -150,13 +149,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rpds-py==0.27.0 rsa==4.9.1 @@ -170,7 +169,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 tenacity==8.5.0 @@ -180,11 +179,12 @@ tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index cfcae926af66..42fb5c732c3d 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -37,12 +37,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.1.8 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -51,16 +51,16 @@ docstring_parser==0.17.0 exceptiongroup==1.3.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -freezegun==1.5.4 +fasteners==0.20 +freezegun==1.5.5 frozenlist==1.7.0 future==1.0.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -73,12 +73,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 greenlet==3.2.4 @@ -92,29 +92,27 @@ hdfs==2.7.3 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.0.2 @@ -124,7 +122,7 @@ opentelemetry-api==1.36.0 opentelemetry-sdk==1.36.0 opentelemetry-semantic-conventions==0.57b0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -146,7 +144,8 @@ pydantic_core==2.33.2 pydot==1.4.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -155,13 +154,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rpds-py==0.27.0 rsa==4.9.1 @@ -175,7 +174,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 tenacity==8.5.0 @@ -186,12 +185,13 @@ tqdm==4.67.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py39/ml_image_requirements.txt b/sdks/python/container/py39/ml_image_requirements.txt index 7ae49f5d024a..f03ef441c259 100644 --- a/sdks/python/container/py39/ml_image_requirements.txt +++ b/sdks/python/container/py39/ml_image_requirements.txt @@ -39,12 +39,12 @@ build==1.3.0 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.1.8 -cloud-sql-python-connector==1.18.3 +cloud-sql-python-connector==1.18.4 crcmod==1.7 cryptography==45.0.6 -Cython==3.1.2 +Cython==3.1.3 dill==0.3.1.1 dnspython==2.7.0 docker==7.1.0 @@ -53,20 +53,20 @@ docstring_parser==0.17.0 exceptiongroup==1.3.0 execnet==2.1.1 fastavro==1.12.0 -fasteners==0.19 -filelock==3.18.0 +fasteners==0.20 +filelock==3.19.1 flatbuffers==25.2.10 -freezegun==1.5.4 +freezegun==1.5.5 frozenlist==1.7.0 fsspec==2025.7.0 future==1.0.0 gast==0.6.0 google-api-core==2.25.1 -google-api-python-client==2.178.0 +google-api-python-client==2.179.0 google-apitools==0.5.31 google-auth==2.40.3 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.108.0 +google-cloud-aiplatform==1.110.0 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-bigtable==2.32.0 @@ -79,12 +79,12 @@ google-cloud-pubsub==2.31.1 google-cloud-pubsublite==1.12.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.14.2 -google-cloud-spanner==3.56.0 +google-cloud-spanner==3.57.0 google-cloud-storage==2.19.0 google-cloud-videointelligence==2.16.2 google-cloud-vision==3.10.2 google-crc32c==1.7.1 -google-genai==1.29.0 +google-genai==1.31.0 google-pasta==0.2.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 @@ -97,23 +97,23 @@ guppy3==3.1.5 h11==0.16.0 h5py==3.14.0 hdfs==2.7.3 -hf-xet==1.1.7 +hf-xet==1.1.8 httpcore==1.0.9 httplib2==0.22.0 httpx==0.28.1 huggingface-hub==0.34.4 -hypothesis==6.137.1 +hypothesis==6.138.2 idna==3.10 importlib_metadata==8.7.0 iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 -jaraco.functools==4.2.1 +jaraco.functools==4.3.0 jeepney==0.9.0 Jinja2==3.1.6 joblib==1.5.1 jsonpickle==3.4.2 -jsonschema==4.25.0 +jsonschema==4.25.1 jsonschema-specifications==2025.4.1 keras==3.10.0 keyring==25.6.0 @@ -123,15 +123,13 @@ Markdown==3.8.2 markdown-it-py==3.0.0 MarkupSafe==3.0.2 mdurl==0.1.2 -ujson==5.8.0 milvus-lite==2.5.1 -pymilvus==2.5.10 ml-dtypes==0.3.2 mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 -multidict==6.6.3 +multidict==6.6.4 mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.2.1 @@ -159,7 +157,7 @@ opentelemetry-semantic-conventions==0.57b0 opt_einsum==3.4.0 optree==0.17.0 oracledb==3.3.0 -orjson==3.11.1 +orjson==3.11.2 overrides==7.7.0 packaging==25.0 pandas==2.2.3 @@ -182,7 +180,8 @@ pydot==1.4.2 Pygments==2.19.2 PyHamcrest==2.1.0 PyJWT==2.10.1 -pymongo==4.14.0 +pymilvus==2.5.14 +pymongo==4.14.1 PyMySQL==1.1.1 pyparsing==3.2.3 pyproject_hooks==1.2.0 @@ -191,13 +190,13 @@ pytest-timeout==2.4.0 pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-tds==1.16.1 +python-tds==1.17.0 pytz==2025.2 PyYAML==6.0.2 redis==5.3.1 referencing==0.36.2 regex==2025.7.34 -requests==2.32.4 +requests==2.32.5 requests-mock==1.12.1 rich==14.1.0 rpds-py==0.27.0 @@ -213,7 +212,7 @@ six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 soupsieve==2.7 -SQLAlchemy==2.0.42 +SQLAlchemy==2.0.43 sqlalchemy_pytds==1.0.2 sqlparse==0.5.3 sympy==1.14.0 @@ -230,18 +229,19 @@ tokenizers==0.21.4 tomli==2.2.1 torch==2.7.1 tqdm==4.67.1 -transformers==4.48.3 +transformers==4.55.2 triton==3.3.1 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 +ujson==5.10.0 uritemplate==4.2.0 urllib3==2.5.0 virtualenv-clone==0.5.7 websockets==15.0.1 Werkzeug==3.1.3 wheel==0.45.1 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 -zstandard==0.23.0 +zstandard==0.24.0 From 3ca5394a5d5e6dfaaa0bcd8f2b9591bba526834c Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Wed, 20 Aug 2025 11:32:45 +0000 Subject: [PATCH 03/29] .github: trigger postcommit python --- .github/trigger_files/beam_PostCommit_Python.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json index c6ec17f48412..4db5f0675328 100644 --- a/.github/trigger_files/beam_PostCommit_Python.json +++ b/.github/trigger_files/beam_PostCommit_Python.json @@ -1,5 +1,5 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", - "modification": 35 + "modification": 37 } From 21262a62feaa20203391f6d19c474b99d852b76e Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Sat, 16 Aug 2025 14:00:42 +0000 Subject: [PATCH 04/29] sdks/python: fix linting issues --- sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py index 58df738c6e5f..6782472d9611 100644 --- a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py +++ b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py @@ -27,16 +27,15 @@ from typing import Union from google.protobuf.json_format import MessageToDict - -from apache_beam.ml.rag.types import Chunk -from apache_beam.ml.rag.types import Embedding -from apache_beam.transforms.enrichment import EnrichmentSourceHandler from pymilvus import AnnSearchRequest from pymilvus import Hit from pymilvus import Hits from pymilvus import MilvusClient from pymilvus import SearchResult +from apache_beam.ml.rag.types import Chunk +from apache_beam.ml.rag.types import Embedding +from apache_beam.transforms.enrichment import EnrichmentSourceHandler class SearchStrategy(Enum): """Search strategies for information retrieval. From 1655458407b504d464b307329ba9cdb39450ba90 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Sat, 16 Aug 2025 14:24:08 +0000 Subject: [PATCH 05/29] sdks/python: fix formatting issues --- sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py index 6782472d9611..a0f597f5366f 100644 --- a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py +++ b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py @@ -37,6 +37,7 @@ from apache_beam.ml.rag.types import Embedding from apache_beam.transforms.enrichment import EnrichmentSourceHandler + class SearchStrategy(Enum): """Search strategies for information retrieval. From 6ed396d21e223e7afea185d3f52886706dc14010 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Wed, 20 Aug 2025 15:21:22 +0000 Subject: [PATCH 06/29] .github: trigger beam postcommit python --- .github/trigger_files/beam_PostCommit_Python.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json index 4db5f0675328..00e0c3c25433 100644 --- a/.github/trigger_files/beam_PostCommit_Python.json +++ b/.github/trigger_files/beam_PostCommit_Python.json @@ -1,5 +1,5 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", - "modification": 37 + "modification": 27 } From 6bcb2149aedeaec833f0cd7d49db18fad91f7cee Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 25 Aug 2025 11:02:17 +0000 Subject: [PATCH 07/29] sdks/python: revert milvus version in itests --- .../apache_beam/ml/rag/enrichment/milvus_search_it_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py index eed02bb49575..4dabcafe6703 100644 --- a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py +++ b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py @@ -481,7 +481,7 @@ class TestMilvusSearchEnrichment(unittest.TestCase): """Tests for search functionality across all search strategies""" _db: MilvusDBContainerInfo - _version = "milvusdb/milvus:v2.3.9" + _version = "milvusdb/milvus:v2.5.10" @classmethod def setUpClass(cls): From 1e9f9fa78992e6aae2ce8eb2f5c236e06b99b830 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 25 Aug 2025 13:13:47 +0000 Subject: [PATCH 08/29] sdks/python: update image requirements --- .../py310/base_image_requirements.txt | 197 ++++++++++++++ .../container/py310/ml_image_requirements.txt | 247 ++++++++++++++++++ .../py311/base_image_requirements.txt | 194 ++++++++++++++ .../container/py311/ml_image_requirements.txt | 244 +++++++++++++++++ .../py312/base_image_requirements.txt | 193 ++++++++++++++ .../container/py312/ml_image_requirements.txt | 242 +++++++++++++++++ .../py313/base_image_requirements.txt | 190 ++++++++++++++ .../py39/base_image_requirements.txt | 197 ++++++++++++++ .../container/py39/ml_image_requirements.txt | 247 ++++++++++++++++++ 9 files changed, 1951 insertions(+) create mode 100644 sdks/python/container/py310/base_image_requirements.txt create mode 100644 sdks/python/container/py310/ml_image_requirements.txt create mode 100644 sdks/python/container/py311/base_image_requirements.txt create mode 100644 sdks/python/container/py311/ml_image_requirements.txt create mode 100644 sdks/python/container/py312/base_image_requirements.txt create mode 100644 sdks/python/container/py312/ml_image_requirements.txt create mode 100644 sdks/python/container/py313/base_image_requirements.txt create mode 100644 sdks/python/container/py39/base_image_requirements.txt create mode 100644 sdks/python/container/py39/ml_image_requirements.txt diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt new file mode 100644 index 000000000000..7fafc0ee1b4f --- /dev/null +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -0,0 +1,197 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py310 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +async-timeout==5.0.1 +attrs==25.3.0 +backports.tarfile==1.2.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.2.1 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +exceptiongroup==1.3.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +freezegun==1.5.5 +frozenlist==1.7.0 +future==1.0.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.65.5 +guppy3==3.1.5 +h11==0.16.0 +hdfs==2.7.3 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +MarkupSafe==3.0.2 +milvus-lite==2.5.1 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +nltk==3.9.1 +numpy==2.2.6 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==5.29.5 +psycopg2-binary==2.9.10 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rpds-py==0.27.0 +rsa==4.9.1 +scikit-learn==1.7.1 +scipy==1.15.3 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +tenacity==8.5.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tomli==2.2.1 +tqdm==4.67.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py310/ml_image_requirements.txt b/sdks/python/container/py310/ml_image_requirements.txt new file mode 100644 index 000000000000..87886d6844cd --- /dev/null +++ b/sdks/python/container/py310/ml_image_requirements.txt @@ -0,0 +1,247 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py310 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +absl-py==2.3.1 +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +astunparse==1.6.3 +async-timeout==5.0.1 +attrs==25.3.0 +backports.tarfile==1.2.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.2.1 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +exceptiongroup==1.3.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +filelock==3.19.1 +flatbuffers==25.2.10 +freezegun==1.5.5 +frozenlist==1.7.0 +fsspec==2025.7.0 +future==1.0.0 +gast==0.6.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-pasta==0.2.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.63.0rc1 +guppy3==3.1.5 +h11==0.16.0 +h5py==3.14.0 +hdfs==2.7.3 +hf-xet==1.1.8 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +huggingface-hub==0.34.4 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keras==3.11.3 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +libclang==18.1.1 +Markdown==3.8.2 +markdown-it-py==4.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +milvus-lite==2.5.1 +ml-dtypes==0.3.2 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +mpmath==1.3.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +namex==0.1.0 +networkx==3.4.2 +nltk==3.9.1 +numpy==1.26.4 +nvidia-cublas-cu12==12.6.4.1 +nvidia-cuda-cupti-cu12==12.6.80 +nvidia-cuda-nvrtc-cu12==12.6.77 +nvidia-cuda-runtime-cu12==12.6.77 +nvidia-cudnn-cu12==9.5.1.17 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cufile-cu12==1.11.1.6 +nvidia-curand-cu12==10.3.7.77 +nvidia-cusolver-cu12==11.7.1.2 +nvidia-cusparse-cu12==12.5.4.2 +nvidia-cusparselt-cu12==0.6.3 +nvidia-nccl-cu12==2.26.2 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-nvtx-cu12==12.6.77 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +opt_einsum==3.4.0 +optree==0.17.0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==4.25.8 +psycopg2-binary==2.9.10 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +Pygments==2.19.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rich==14.1.0 +rpds-py==0.27.0 +rsa==4.9.1 +safetensors==0.6.2 +scikit-learn==1.7.1 +scipy==1.15.3 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +sympy==1.14.0 +tenacity==8.5.0 +tensorboard==2.16.2 +tensorboard-data-server==0.7.2 +tensorflow==2.16.2 +tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64" +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==3.1.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tokenizers==0.21.4 +tomli==2.2.1 +torch==2.7.1 +tqdm==4.67.1 +transformers==4.55.4 +triton==3.3.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +Werkzeug==3.1.3 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt new file mode 100644 index 000000000000..e682ee82f0a7 --- /dev/null +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -0,0 +1,194 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py311 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +attrs==25.3.0 +backports.tarfile==1.2.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.2.1 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +freezegun==1.5.5 +frozenlist==1.7.0 +future==1.0.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.65.5 +guppy3==3.1.5 +h11==0.16.0 +hdfs==2.7.3 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +MarkupSafe==3.0.2 +milvus-lite==2.5.1 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +nltk==3.9.1 +numpy==2.2.6 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==5.29.5 +psycopg2-binary==2.9.10 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rpds-py==0.27.0 +rsa==4.9.1 +scikit-learn==1.7.1 +scipy==1.16.1 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +tenacity==8.5.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tqdm==4.67.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py311/ml_image_requirements.txt b/sdks/python/container/py311/ml_image_requirements.txt new file mode 100644 index 000000000000..9bfac83ba001 --- /dev/null +++ b/sdks/python/container/py311/ml_image_requirements.txt @@ -0,0 +1,244 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py311 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +absl-py==2.3.1 +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +astunparse==1.6.3 +attrs==25.3.0 +backports.tarfile==1.2.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.2.1 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +filelock==3.19.1 +flatbuffers==25.2.10 +freezegun==1.5.5 +frozenlist==1.7.0 +fsspec==2025.7.0 +future==1.0.0 +gast==0.6.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-pasta==0.2.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.63.0rc1 +guppy3==3.1.5 +h11==0.16.0 +h5py==3.14.0 +hdfs==2.7.3 +hf-xet==1.1.8 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +huggingface-hub==0.34.4 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keras==3.11.3 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +libclang==18.1.1 +Markdown==3.8.2 +markdown-it-py==4.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +milvus-lite==2.5.1 +ml-dtypes==0.3.2 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +mpmath==1.3.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +namex==0.1.0 +networkx==3.5 +nltk==3.9.1 +numpy==1.26.4 +nvidia-cublas-cu12==12.6.4.1 +nvidia-cuda-cupti-cu12==12.6.80 +nvidia-cuda-nvrtc-cu12==12.6.77 +nvidia-cuda-runtime-cu12==12.6.77 +nvidia-cudnn-cu12==9.5.1.17 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cufile-cu12==1.11.1.6 +nvidia-curand-cu12==10.3.7.77 +nvidia-cusolver-cu12==11.7.1.2 +nvidia-cusparse-cu12==12.5.4.2 +nvidia-cusparselt-cu12==0.6.3 +nvidia-nccl-cu12==2.26.2 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-nvtx-cu12==12.6.77 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +opt_einsum==3.4.0 +optree==0.17.0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==4.25.8 +psycopg2-binary==2.9.10 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +Pygments==2.19.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rich==14.1.0 +rpds-py==0.27.0 +rsa==4.9.1 +safetensors==0.6.2 +scikit-learn==1.7.1 +scipy==1.16.1 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +sympy==1.14.0 +tenacity==8.5.0 +tensorboard==2.16.2 +tensorboard-data-server==0.7.2 +tensorflow==2.16.2 +tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64" +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==3.1.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tokenizers==0.21.4 +torch==2.7.1 +tqdm==4.67.1 +transformers==4.55.4 +triton==3.3.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +Werkzeug==3.1.3 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py312/base_image_requirements.txt b/sdks/python/container/py312/base_image_requirements.txt new file mode 100644 index 000000000000..c031e4f29f59 --- /dev/null +++ b/sdks/python/container/py312/base_image_requirements.txt @@ -0,0 +1,193 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py312 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +attrs==25.3.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.2.1 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +freezegun==1.5.5 +frozenlist==1.7.0 +future==1.0.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.65.5 +guppy3==3.1.5 +h11==0.16.0 +hdfs==2.7.3 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +MarkupSafe==3.0.2 +milvus-lite==2.5.1 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +nltk==3.9.1 +numpy==2.2.6 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==5.29.5 +psycopg2-binary==2.9.10 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rpds-py==0.27.0 +rsa==4.9.1 +scikit-learn==1.7.1 +scipy==1.16.1 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +tenacity==8.5.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tqdm==4.67.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py312/ml_image_requirements.txt b/sdks/python/container/py312/ml_image_requirements.txt new file mode 100644 index 000000000000..0b627d0cf7c7 --- /dev/null +++ b/sdks/python/container/py312/ml_image_requirements.txt @@ -0,0 +1,242 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py312 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +absl-py==2.3.1 +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +astunparse==1.6.3 +attrs==25.3.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.2.1 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +filelock==3.19.1 +flatbuffers==25.2.10 +freezegun==1.5.5 +frozenlist==1.7.0 +fsspec==2025.7.0 +future==1.0.0 +gast==0.6.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-pasta==0.2.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.63.0rc1 +guppy3==3.1.5 +h11==0.16.0 +h5py==3.14.0 +hdfs==2.7.3 +hf-xet==1.1.8 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +huggingface-hub==0.34.4 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keras==3.11.3 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +libclang==18.1.1 +Markdown==3.8.2 +markdown-it-py==4.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +milvus-lite==2.5.1 +ml-dtypes==0.3.2 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +mpmath==1.3.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +namex==0.1.0 +networkx==3.5 +nltk==3.9.1 +numpy==1.26.4 +nvidia-cublas-cu12==12.6.4.1 +nvidia-cuda-cupti-cu12==12.6.80 +nvidia-cuda-nvrtc-cu12==12.6.77 +nvidia-cuda-runtime-cu12==12.6.77 +nvidia-cudnn-cu12==9.5.1.17 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cufile-cu12==1.11.1.6 +nvidia-curand-cu12==10.3.7.77 +nvidia-cusolver-cu12==11.7.1.2 +nvidia-cusparse-cu12==12.5.4.2 +nvidia-cusparselt-cu12==0.6.3 +nvidia-nccl-cu12==2.26.2 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-nvtx-cu12==12.6.77 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +opt_einsum==3.4.0 +optree==0.17.0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==4.25.8 +psycopg2-binary==2.9.10 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +Pygments==2.19.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rich==14.1.0 +rpds-py==0.27.0 +rsa==4.9.1 +safetensors==0.6.2 +scikit-learn==1.7.1 +scipy==1.16.1 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +sympy==1.14.0 +tenacity==8.5.0 +tensorboard==2.16.2 +tensorboard-data-server==0.7.2 +tensorflow==2.16.2 +tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64" +termcolor==3.1.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tokenizers==0.21.4 +torch==2.7.1 +tqdm==4.67.1 +transformers==4.55.4 +triton==3.3.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +Werkzeug==3.1.3 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py313/base_image_requirements.txt b/sdks/python/container/py313/base_image_requirements.txt new file mode 100644 index 000000000000..7da02431ff41 --- /dev/null +++ b/sdks/python/container/py313/base_image_requirements.txt @@ -0,0 +1,190 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py313 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +attrs==25.3.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.2.1 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +freezegun==1.5.5 +frozenlist==1.7.0 +future==1.0.0 +google-api-core==2.25.1 +google-apitools==0.5.32 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.74.0 +grpcio-status==1.71.2 +guppy3==3.1.5 +h11==0.16.0 +hdfs==2.7.3 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +MarkupSafe==3.0.2 +milvus-lite==2.5.1 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +nltk==3.9.1 +numpy==2.2.6 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==5.29.5 +psycopg2-binary==2.9.10 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.6.0 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rpds-py==0.27.0 +rsa==4.9.1 +scikit-learn==1.7.1 +scipy==1.16.1 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +tenacity==8.5.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tqdm==4.67.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt new file mode 100644 index 000000000000..7ef99a7a047d --- /dev/null +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -0,0 +1,197 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py39 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +async-timeout==5.0.1 +attrs==25.3.0 +backports.tarfile==1.2.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.1.8 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +exceptiongroup==1.3.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +freezegun==1.5.5 +frozenlist==1.7.0 +future==1.0.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.65.5 +guppy3==3.1.5 +h11==0.16.0 +hdfs==2.7.3 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +MarkupSafe==3.0.2 +milvus-lite==2.5.1 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +nltk==3.9.1 +numpy==2.0.2 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==5.29.5 +psycopg2-binary==2.9.9 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rpds-py==0.27.0 +rsa==4.9.1 +scikit-learn==1.6.1 +scipy==1.13.1 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.0.7 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +tenacity==8.5.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tomli==2.2.1 +tqdm==4.67.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 diff --git a/sdks/python/container/py39/ml_image_requirements.txt b/sdks/python/container/py39/ml_image_requirements.txt new file mode 100644 index 000000000000..5aeac46b6551 --- /dev/null +++ b/sdks/python/container/py39/ml_image_requirements.txt @@ -0,0 +1,247 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Autogenerated requirements file for Apache Beam py39 container image. +# Run ./gradlew :sdks:python:container:generatePythonRequirementsAll to update. +# Do not edit manually, adjust ../base_image_requirements_manual.txt or +# Apache Beam's setup.py instead, and regenerate the list. +# You will need Python interpreters for all versions supported by Beam, see: +# https://s.apache.org/beam-python-dev-wiki +# Reach out to a committer if you need help. + +absl-py==2.3.1 +aiofiles==24.1.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.10.0 +asn1crypto==1.5.1 +astunparse==1.6.3 +async-timeout==5.0.1 +attrs==25.3.0 +backports.tarfile==1.2.0 +beautifulsoup4==4.13.5 +bs4==0.0.2 +build==1.3.0 +cachetools==5.5.2 +certifi==2025.8.3 +cffi==1.17.1 +charset-normalizer==3.4.3 +click==8.1.8 +cloud-sql-python-connector==1.18.4 +crcmod==1.7 +cryptography==45.0.6 +Cython==3.1.3 +dill==0.3.1.1 +dnspython==2.7.0 +docker==7.1.0 +docopt==0.6.2 +docstring_parser==0.17.0 +exceptiongroup==1.3.0 +execnet==2.1.1 +fastavro==1.12.0 +fasteners==0.20 +filelock==3.19.1 +flatbuffers==25.2.10 +freezegun==1.5.5 +frozenlist==1.7.0 +fsspec==2025.7.0 +future==1.0.0 +gast==0.6.0 +google-api-core==2.25.1 +google-api-python-client==2.179.0 +google-apitools==0.5.31 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-cloud-aiplatform==1.110.0 +google-cloud-bigquery==3.36.0 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigtable==2.32.0 +google-cloud-core==2.4.3 +google-cloud-datastore==2.21.0 +google-cloud-dlp==3.31.0 +google-cloud-language==2.17.2 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.31.1 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.18 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.57.0 +google-cloud-storage==2.19.0 +google-cloud-videointelligence==2.16.2 +google-cloud-vision==3.10.2 +google-crc32c==1.7.1 +google-genai==1.31.0 +google-pasta==0.2.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +grpc-google-iam-v1==0.14.2 +grpc-interceptor==0.15.4 +grpcio==1.65.5 +grpcio-status==1.63.0rc1 +guppy3==3.1.5 +h11==0.16.0 +h5py==3.14.0 +hdfs==2.7.3 +hf-xet==1.1.8 +httpcore==1.0.9 +httplib2==0.22.0 +httpx==0.28.1 +huggingface-hub==0.34.4 +hypothesis==6.138.3 +idna==3.10 +importlib_metadata==8.7.0 +iniconfig==2.1.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==3.4.2 +jsonschema==4.25.1 +jsonschema-specifications==2025.4.1 +keras==3.10.0 +keyring==25.6.0 +keyrings.google-artifactregistry-auth==1.1.2 +libclang==18.1.1 +Markdown==3.8.2 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +milvus-lite==2.5.1 +ml-dtypes==0.3.2 +mmh3==5.2.0 +mock==5.2.0 +more-itertools==10.7.0 +mpmath==1.3.0 +multidict==6.6.4 +mysql-connector-python==9.4.0 +namex==0.1.0 +networkx==3.2.1 +nltk==3.9.1 +numpy==1.26.4 +nvidia-cublas-cu12==12.6.4.1 +nvidia-cuda-cupti-cu12==12.6.80 +nvidia-cuda-nvrtc-cu12==12.6.77 +nvidia-cuda-runtime-cu12==12.6.77 +nvidia-cudnn-cu12==9.5.1.17 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cufile-cu12==1.11.1.6 +nvidia-curand-cu12==10.3.7.77 +nvidia-cusolver-cu12==11.7.1.2 +nvidia-cusparse-cu12==12.5.4.2 +nvidia-cusparselt-cu12==0.6.3 +nvidia-nccl-cu12==2.26.2 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-nvtx-cu12==12.6.77 +oauth2client==4.1.3 +objsize==0.7.1 +opentelemetry-api==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +opt_einsum==3.4.0 +optree==0.17.0 +oracledb==3.3.0 +orjson==3.11.2 +overrides==7.7.0 +packaging==25.0 +pandas==2.2.3 +parameterized==0.9.0 +pg8000==1.31.4 +pip==25.2 +pluggy==1.6.0 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==4.25.8 +psycopg2-binary==2.9.9 +pyarrow==18.1.0 +pyarrow-hotfix==0.7 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydot==1.4.2 +Pygments==2.19.2 +PyHamcrest==2.1.0 +PyJWT==2.10.1 +pymilvus==2.5.15 +pymongo==4.14.1 +PyMySQL==1.1.2 +pyparsing==3.2.3 +pyproject_hooks==1.2.0 +pytest==7.4.4 +pytest-timeout==2.4.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-tds==1.17.0 +pytz==2025.2 +PyYAML==6.0.2 +redis==5.3.1 +referencing==0.36.2 +regex==2025.7.34 +requests==2.32.5 +requests-mock==1.12.1 +rich==14.1.0 +rpds-py==0.27.0 +rsa==4.9.1 +safetensors==0.6.2 +scikit-learn==1.6.1 +scipy==1.13.1 +scramp==1.4.6 +SecretStorage==3.3.3 +setuptools==80.9.0 +shapely==2.0.7 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soupsieve==2.7 +SQLAlchemy==2.0.43 +sqlalchemy_pytds==1.0.2 +sqlparse==0.5.3 +sympy==1.14.0 +tenacity==8.5.0 +tensorboard==2.16.2 +tensorboard-data-server==0.7.2 +tensorflow==2.16.2 +tensorflow-cpu-aws==2.16.2;platform_machine=="aarch64" +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==3.1.0 +testcontainers==4.12.0 +threadpoolctl==3.6.0 +tokenizers==0.21.4 +tomli==2.2.1 +torch==2.7.1 +tqdm==4.67.1 +transformers==4.55.4 +triton==3.3.1 +typing-inspection==0.4.1 +typing_extensions==4.14.1 +tzdata==2025.2 +ujson==5.11.0 +uritemplate==4.2.0 +urllib3==2.5.0 +virtualenv-clone==0.5.7 +websockets==15.0.1 +Werkzeug==3.1.3 +wheel==0.45.1 +wrapt==1.17.3 +yarl==1.20.1 +zipp==3.23.0 +zstandard==0.24.0 From 4da18cfd42bc39f71afc258c0b2d0f15d9e4af52 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 25 Aug 2025 13:14:22 +0000 Subject: [PATCH 09/29] trigger_files: trigger postcommit python --- .github/trigger_files/beam_PostCommit_Python.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json index 5dfe9767be94..00e0c3c25433 100644 --- a/.github/trigger_files/beam_PostCommit_Python.json +++ b/.github/trigger_files/beam_PostCommit_Python.json @@ -1,5 +1,5 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", - "modification": 309 + "modification": 27 } From 0cee2e0cba845aeb0042be2068450db655f39130 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 16:18:06 -0400 Subject: [PATCH 10/29] Bump github.com/docker/go-connections from 0.5.0 to 0.6.0 in /sdks (#35906) Bumps [github.com/docker/go-connections](https://github.com/docker/go-connections) from 0.5.0 to 0.6.0. - [Commits](https://github.com/docker/go-connections/compare/v0.5.0...v0.6.0) --- updated-dependencies: - dependency-name: github.com/docker/go-connections dependency-version: 0.6.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- sdks/go.mod | 2 +- sdks/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/go.mod b/sdks/go.mod index df83d0f31e7c..f0599fe9b1f3 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -38,7 +38,7 @@ require ( github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.18.5 github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 github.com/aws/smithy-go v1.22.5 - github.com/docker/go-connections v0.5.0 + github.com/docker/go-connections v0.6.0 github.com/dustin/go-humanize v1.0.1 github.com/go-sql-driver/mysql v1.9.3 github.com/google/go-cmp v0.7.0 diff --git a/sdks/go.sum b/sdks/go.sum index 4d649d549be5..352b81ae25b1 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -895,8 +895,8 @@ github.com/dnaeon/go-vcr v1.1.0/go.mod h1:M7tiix8f0r6mKKJ3Yq/kqU1OYf3MnfmBWVbPx/ github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI= github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= -github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= +github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= +github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= From bba4f936667a6ee66e71248f46719a38348a88fe Mon Sep 17 00:00:00 2001 From: Chamikara Jayalath Date: Tue, 26 Aug 2025 14:05:54 -0700 Subject: [PATCH 11/29] Add the readme link to new YAML examples (#35941) --- sdks/python/apache_beam/yaml/examples/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/yaml/examples/README.md b/sdks/python/apache_beam/yaml/examples/README.md index 4cba973dbead..8c6356f68a38 100644 --- a/sdks/python/apache_beam/yaml/examples/README.md +++ b/sdks/python/apache_beam/yaml/examples/README.md @@ -258,9 +258,9 @@ ML enrichments: Examples that include ML-specific transforms such as `RunInference` and `MLTransform`: -- [streaming_sentiment_analysis.yaml](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/ml/sentiment_analysis/streaming_sentiment_analysis.yaml) -- [streaming_taxifare_prediction.yaml](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/ml/taxi_fare/streaming_taxifare_prediction.yaml) -- [batch_log_analysis.yaml](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/ml/log_analysis/batch_log_analysis.yaml) +- Streaming Sentiment Analysis ([documentation](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/yaml/examples/transforms/ml/sentiment_analysis)) ([pipeline](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/ml/sentiment_analysis/streaming_sentiment_analysis.yaml)) +- Streaming Taxi Fare Prediction ([documentation](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/yaml/examples/transforms/ml/taxi_fare)) ([pipeline](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/ml/taxi_fare/streaming_taxifare_prediction.yaml)) +- Batch Log Analysis ML Workflow ([documentation](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/yaml/examples/transforms/ml/log_analysis)) ([pipeline](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/ml/log_analysis/batch_log_analysis.yaml)) More information can be found about aggregation transforms [here](https://beam.apache.org/documentation/sdks/yaml-combine/). From fa5f7d13adbf304ccd9631aa60c94a10c470a3d2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 19:44:52 -0400 Subject: [PATCH 12/29] Bump google.golang.org/api from 0.247.0 to 0.248.0 in /sdks (#35969) --- sdks/go.mod | 6 +++--- sdks/go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sdks/go.mod b/sdks/go.mod index f0599fe9b1f3..d9b3791ffe16 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -60,7 +60,7 @@ require ( golang.org/x/sync v0.16.0 golang.org/x/sys v0.35.0 golang.org/x/text v0.28.0 - google.golang.org/api v0.247.0 + google.golang.org/api v0.248.0 google.golang.org/genproto v0.0.0-20250603155806-513f23925822 google.golang.org/grpc v1.74.2 google.golang.org/protobuf v1.36.8 @@ -77,7 +77,7 @@ require ( require ( cel.dev/expr v0.24.0 // indirect - cloud.google.com/go/auth v0.16.4 // indirect + cloud.google.com/go/auth v0.16.5 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/monitoring v1.24.2 // indirect cloud.google.com/go/pubsub/v2 v2.0.0 // indirect @@ -206,5 +206,5 @@ require ( golang.org/x/tools v0.35.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250721164621-a45f3dfb1074 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c // indirect ) diff --git a/sdks/go.sum b/sdks/go.sum index 352b81ae25b1..d6f950f7ceba 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -103,8 +103,8 @@ cloud.google.com/go/assuredworkloads v1.7.0/go.mod h1:z/736/oNmtGAyU47reJgGN+KVo cloud.google.com/go/assuredworkloads v1.8.0/go.mod h1:AsX2cqyNCOvEQC8RMPnoc0yEarXQk6WEKkxYfL6kGIo= cloud.google.com/go/assuredworkloads v1.9.0/go.mod h1:kFuI1P78bplYtT77Tb1hi0FMxM0vVpRC7VVoJC3ZoT0= cloud.google.com/go/assuredworkloads v1.10.0/go.mod h1:kwdUQuXcedVdsIaKgKTp9t0UJkE5+PAVNhdQm4ZVq2E= -cloud.google.com/go/auth v0.16.4 h1:fXOAIQmkApVvcIn7Pc2+5J8QTMVbUGLscnSVNl11su8= -cloud.google.com/go/auth v0.16.4/go.mod h1:j10ncYwjX/g3cdX7GpEzsdM+d+ZNsXAbb6qXA7p1Y5M= +cloud.google.com/go/auth v0.16.5 h1:mFWNQ2FEVWAliEQWpAdH80omXFokmrnbDhUS9cBywsI= +cloud.google.com/go/auth v0.16.5/go.mod h1:utzRfHMP+Vv0mpOkTRQoWD2q3BatTOoWbA7gCc2dUhQ= cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/automl v1.5.0/go.mod h1:34EjfoFGMZ5sgJ9EoLsRtdPSNZLcfflJR39VbVNS2M0= @@ -2053,8 +2053,8 @@ google.golang.org/api v0.108.0/go.mod h1:2Ts0XTHNVWxypznxWOYUeI4g3WdP9Pk2Qk58+a/ google.golang.org/api v0.110.0/go.mod h1:7FC4Vvx1Mooxh8C5HWjzZHcavuS2f6pmJpZx60ca7iI= google.golang.org/api v0.111.0/go.mod h1:qtFHvU9mhgTJegR31csQ+rwxyUTHOKFqCKWp1J0fdw0= google.golang.org/api v0.114.0/go.mod h1:ifYI2ZsFK6/uGddGfAD5BMxlnkBqCmqHSDUVi45N5Yg= -google.golang.org/api v0.247.0 h1:tSd/e0QrUlLsrwMKmkbQhYVa109qIintOls2Wh6bngc= -google.golang.org/api v0.247.0/go.mod h1:r1qZOPmxXffXg6xS5uhx16Fa/UFY8QU/K4bfKrnvovM= +google.golang.org/api v0.248.0 h1:hUotakSkcwGdYUqzCRc5yGYsg4wXxpkKlW5ryVqvC1Y= +google.golang.org/api v0.248.0/go.mod h1:yAFUAF56Li7IuIQbTFoLwXTCI6XCFKueOlS7S9e4F9k= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -2217,8 +2217,8 @@ google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuO google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s= google.golang.org/genproto/googleapis/api v0.0.0-20250721164621-a45f3dfb1074 h1:mVXdvnmR3S3BQOqHECm9NGMjYiRtEvDYcqAqedTXY6s= google.golang.org/genproto/googleapis/api v0.0.0-20250721164621-a45f3dfb1074/go.mod h1:vYFwMYFbmA8vl6Z/krj/h7+U/AqpHknwJX4Uqgfyc7I= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b h1:zPKJod4w6F1+nRGDI9ubnXYhU9NSWoFAijkHkUXeTK8= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c h1:qXWI/sQtv5UKboZ/zUk7h+mrf/lXORyI+n9DKDAusdg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c/go.mod h1:gw1tLEfykwDz2ET4a12jcXt4couGAm7IwsVaTy0Sflo= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= From 4c5cadad5a7924cd1c19f3499b7724669acc8da7 Mon Sep 17 00:00:00 2001 From: Yi Hu Date: Tue, 26 Aug 2025 20:41:32 -0400 Subject: [PATCH 13/29] Remove mysql-connector-python dependency (#35932) --- .../apache_beam/yaml/integration_tests.py | 33 ++++++++----------- .../ml/py310/base_image_requirements.txt | 1 - .../ml/py311/base_image_requirements.txt | 1 - .../ml/py312/base_image_requirements.txt | 1 - .../ml/py39/base_image_requirements.txt | 1 - .../py310/base_image_requirements.txt | 1 - .../py311/base_image_requirements.txt | 1 - .../py312/base_image_requirements.txt | 1 - .../py313/base_image_requirements.txt | 1 - .../py39/base_image_requirements.txt | 1 - sdks/python/setup.py | 1 - 11 files changed, 14 insertions(+), 29 deletions(-) diff --git a/sdks/python/apache_beam/yaml/integration_tests.py b/sdks/python/apache_beam/yaml/integration_tests.py index 38fa2689268e..733dd10d0286 100644 --- a/sdks/python/apache_beam/yaml/integration_tests.py +++ b/sdks/python/apache_beam/yaml/integration_tests.py @@ -33,7 +33,6 @@ from datetime import timezone import mock -import mysql.connector import psycopg2 import pytds import sqlalchemy @@ -286,26 +285,22 @@ def temp_mysql_database(): Exception: Any other exception encountered during the setup process. """ with MySqlContainer(init=True, dialect='pymysql') as mysql_container: - try: - # Make connection to temp database and create tmp table - engine = sqlalchemy.create_engine(mysql_container.get_connection_url()) - with engine.begin() as connection: - connection.execute( - sqlalchemy.text( - "CREATE TABLE tmp_table (value INTEGER, `rank` INTEGER);")) + # Make connection to temp database and create tmp table + engine = sqlalchemy.create_engine(mysql_container.get_connection_url()) + with engine.begin() as connection: + connection.execute( + sqlalchemy.text( + "CREATE TABLE tmp_table (value INTEGER, `rank` INTEGER);")) - # Construct the JDBC url for connections later on by tests - jdbc_url = ( - f"jdbc:mysql://{mysql_container.get_container_host_ip()}:" - f"{mysql_container.get_exposed_port(mysql_container.port)}/" - f"{mysql_container.dbname}?" - f"user={mysql_container.username}&" - f"password={mysql_container.password}") + # Construct the JDBC url for connections later on by tests + jdbc_url = ( + f"jdbc:mysql://{mysql_container.get_container_host_ip()}:" + f"{mysql_container.get_exposed_port(mysql_container.port)}/" + f"{mysql_container.dbname}?" + f"user={mysql_container.username}&" + f"password={mysql_container.password}") - yield jdbc_url - except mysql.connector.Error as err: - logging.error("Error interacting with temporary MySQL DB: %s", err) - raise err + yield jdbc_url @contextlib.contextmanager diff --git a/sdks/python/container/ml/py310/base_image_requirements.txt b/sdks/python/container/ml/py310/base_image_requirements.txt index ba2ee0d85340..2b067d2678e3 100644 --- a/sdks/python/container/ml/py310/base_image_requirements.txt +++ b/sdks/python/container/ml/py310/base_image_requirements.txt @@ -133,7 +133,6 @@ mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 multidict==6.6.4 -mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.4.2 nltk==3.9.1 diff --git a/sdks/python/container/ml/py311/base_image_requirements.txt b/sdks/python/container/ml/py311/base_image_requirements.txt index 15a4050ab0f3..5d2717471be0 100644 --- a/sdks/python/container/ml/py311/base_image_requirements.txt +++ b/sdks/python/container/ml/py311/base_image_requirements.txt @@ -131,7 +131,6 @@ mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 multidict==6.6.4 -mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.5 nltk==3.9.1 diff --git a/sdks/python/container/ml/py312/base_image_requirements.txt b/sdks/python/container/ml/py312/base_image_requirements.txt index 488e4e27f486..5cc5d73845c0 100644 --- a/sdks/python/container/ml/py312/base_image_requirements.txt +++ b/sdks/python/container/ml/py312/base_image_requirements.txt @@ -130,7 +130,6 @@ mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 multidict==6.6.4 -mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.5 nltk==3.9.1 diff --git a/sdks/python/container/ml/py39/base_image_requirements.txt b/sdks/python/container/ml/py39/base_image_requirements.txt index 3785f612a4af..b5cf7eed05ac 100644 --- a/sdks/python/container/ml/py39/base_image_requirements.txt +++ b/sdks/python/container/ml/py39/base_image_requirements.txt @@ -133,7 +133,6 @@ mock==5.2.0 more-itertools==10.7.0 mpmath==1.3.0 multidict==6.6.4 -mysql-connector-python==9.4.0 namex==0.1.0 networkx==3.2.1 nltk==3.9.1 diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 3f4deee29713..dce4eb6f75a0 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -116,7 +116,6 @@ mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 multidict==6.6.4 -mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 oauth2client==4.1.3 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt index d67181c22ba7..15ddcbb576c1 100644 --- a/sdks/python/container/py311/base_image_requirements.txt +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -114,7 +114,6 @@ mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 multidict==6.6.4 -mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 oauth2client==4.1.3 diff --git a/sdks/python/container/py312/base_image_requirements.txt b/sdks/python/container/py312/base_image_requirements.txt index 35f29dbfa644..ff68b308aa81 100644 --- a/sdks/python/container/py312/base_image_requirements.txt +++ b/sdks/python/container/py312/base_image_requirements.txt @@ -113,7 +113,6 @@ mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 multidict==6.6.4 -mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 oauth2client==4.1.3 diff --git a/sdks/python/container/py313/base_image_requirements.txt b/sdks/python/container/py313/base_image_requirements.txt index fd7516f43295..c86c61162e2b 100644 --- a/sdks/python/container/py313/base_image_requirements.txt +++ b/sdks/python/container/py313/base_image_requirements.txt @@ -111,7 +111,6 @@ mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 multidict==6.6.4 -mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.2.6 oauth2client==4.1.3 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index 423c66980410..d8be5d49eeaf 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -116,7 +116,6 @@ mmh3==5.2.0 mock==5.2.0 more-itertools==10.7.0 multidict==6.6.4 -mysql-connector-python==9.4.0 nltk==3.9.1 numpy==2.0.2 oauth2client==4.1.3 diff --git a/sdks/python/setup.py b/sdks/python/setup.py index b88034174804..3f37a162f5ea 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -442,7 +442,6 @@ def get_portability_package_data(): 'cryptography>=41.0.2', 'hypothesis>5.0.0,<7.0.0', 'virtualenv-clone>=0.5,<1.0', - 'mysql-connector-python>=9.3.0', 'python-tds>=1.16.1', 'sqlalchemy-pytds>=1.0.2', 'pg8000>=1.31.1', From 81e4db798b71067de24506c76dc083a8cf1ab613 Mon Sep 17 00:00:00 2001 From: kristynsmith Date: Wed, 27 Aug 2025 08:42:16 -0400 Subject: [PATCH 14/29] Fix typos and update test implementation from #35656 (#35958) * implement lambda name pickling in cloudpickle * add enable_lambda_name to __init__ * fix formatting and lint * fix typo * fix code paths in test * fix tests * fix lint * fix formatting and failing test * fix formatting again * remove cloudpickle implementation to leave only typo fixes and fixing test structure. * fix _make_function typo * revert regex * fix failing tests * fix formatting * update prefix to not hardcode --- .../internal/code_object_pickler_test.py | 75 ++++++++----------- 1 file changed, 33 insertions(+), 42 deletions(-) diff --git a/sdks/python/apache_beam/internal/code_object_pickler_test.py b/sdks/python/apache_beam/internal/code_object_pickler_test.py index 2060533e9328..de01f16fd0a7 100644 --- a/sdks/python/apache_beam/internal/code_object_pickler_test.py +++ b/sdks/python/apache_beam/internal/code_object_pickler_test.py @@ -126,30 +126,27 @@ def get_lambda_from_dictionary(): return get_lambda_from_dictionary() +prefix = __name__ + test_cases = [ + (top_level_function, f"{prefix}.top_level_function" + ".__code__"), + (top_level_lambda, f"{prefix}.top_level_lambda" + ".__code__"), ( - top_level_function, - "apache_beam.internal.code_object_pickler_test.top_level_function" - ".__code__"), - ( - top_level_lambda, - "apache_beam.internal.code_object_pickler_test.top_level_lambda" - ".__code__"), - ( - get_nested_function(), - ( - "apache_beam.internal.code_object_pickler_test.get_nested_function" + get_nested_function(), ( + f"{prefix}.get_nested_function" ".__code__.co_consts[nested_function]")), ( get_lambda_from_dictionary(), ( - "apache_beam.internal.code_object_pickler_test" + f"{prefix}" ".get_lambda_from_dictionary.__code__.co_consts[, ('x',)]") ), ( get_lambda_from_dictionary_same_args(), ( - "apache_beam.internal.code_object_pickler_test" + f"{prefix}" ".get_lambda_from_dictionary_same_args.__code__.co_consts" "[, ('x',), " + hashlib.md5( get_lambda_from_dictionary_same_args().__code__.co_code). @@ -157,52 +154,46 @@ def get_lambda_from_dictionary(): ( function_with_lambda_default_argument(), ( - "apache_beam.internal.code_object_pickler_test" + f"{prefix}" ".function_with_lambda_default_argument.__defaults__[0].__code__")), ( function_with_function_default_argument(), - "apache_beam.internal.code_object_pickler_test.top_level_function" + f"{prefix}.top_level_function" ".__code__"), - ( - add_one, - "apache_beam.internal.code_object_pickler_test.function_decorator" - ".__code__.co_consts[]"), + (add_one, f"{prefix}.function_decorator" + ".__code__.co_consts[]"), ( ClassWithFunction.process, - "apache_beam.internal.code_object_pickler_test.ClassWithFunction" + f"{prefix}.ClassWithFunction" ".process.__code__"), ( ClassWithStaticMethod.static_method, - "apache_beam.internal.code_object_pickler_test.ClassWithStaticMethod" + f"{prefix}.ClassWithStaticMethod" ".static_method.__code__"), ( ClassWithClassMethod.class_method, - "apache_beam.internal.code_object_pickler_test.ClassWithClassMethod" + f"{prefix}.ClassWithClassMethod" ".class_method.__code__"), ( ClassWithNestedFunction().process(), ( - "apache_beam.internal.code_object_pickler_test" - ".ClassWithNestedFunction.process.__code__.co_consts" + f"{prefix}.ClassWithNestedFunction.process.__code__.co_consts" "[nested_function]")), ( ClassWithLambda().process(), - "apache_beam.internal.code_object_pickler_test.ClassWithLambda.process" - ".__code__.co_consts[]"), + f"{prefix}.ClassWithLambda.process.__code__.co_consts[]"), ( ClassWithNestedClass.InnerClass().process, - "apache_beam.internal.code_object_pickler_test.ClassWithNestedClass" - ".InnerClass.process.__code__"), + f"{prefix}.ClassWithNestedClass.InnerClass.process.__code__"), ( ClassWithNestedLambda().process(), ( - "apache_beam.internal.code_object_pickler_test" + f"{prefix}" ".ClassWithNestedLambda.process.__code__.co_consts" "[get_lambda_from_dictionary].co_consts[, ('x',)]")), ( ClassWithNestedLambda.process, - "apache_beam.internal.code_object_pickler_test.ClassWithNestedLambda" - ".process.__code__"), + f"{prefix}.ClassWithNestedLambda.process.__code__"), ] @@ -225,35 +216,35 @@ def test_roundtrip(self, callable, unused_path): class GetCodeFromCodeObjectIdentifierTest(unittest.TestCase): - def empty_path_raises_exception(self): + def test_empty_path_raises_exception(self): with self.assertRaisesRegex(ValueError, "Path must not be empty"): - code_object_pickler.test_get_code_from_identifier("") + code_object_pickler.get_code_from_identifier("") - def invalid_default_index_raises_exception(self): + def test_invalid_default_index_raises_exception(self): with self.assertRaisesRegex(ValueError, "out of bounds"): - code_object_pickler.test_get_code_from_identifier( - "apache_beam.internal.test_cases.module_with_default_argument." + code_object_pickler.get_code_from_identifier( + "apache_beam.internal.test_data.module_with_default_argument." "function_with_lambda_default_argument.__defaults__[1]") - def invalid_single_name_path_raises_exception(self): + def test_invalid_single_name_path_raises_exception(self): with self.assertRaisesRegex(AttributeError, "Could not find code object with path"): code_object_pickler.get_code_from_identifier( - "apache_beam.internal.test_cases.module_3." + "apache_beam.internal.test_data.module_3." "my_function.__code__.co_consts[something]") - def invalid_lambda_with_args_path_raises_exception(self): + def test_invalid_lambda_with_args_path_raises_exception(self): with self.assertRaisesRegex(AttributeError, "Could not find code object with path"): code_object_pickler.get_code_from_identifier( - "apache_beam.internal.test_cases.module_3." + "apache_beam.internal.test_data.module_3." "my_function.__code__.co_consts[, ('x',)]") - def invalid_lambda_with_hash_path_raises_exception(self): + def test_invalid_lambda_with_hash_path_raises_exception(self): with self.assertRaisesRegex(AttributeError, "Could not find code object with path"): code_object_pickler.get_code_from_identifier( - "apache_beam.internal.test_cases.module_3." + "apache_beam.internal.test_data.module_3." "my_function.__code__.co_consts[, ('',), 1234567890]") def test_adding_local_variable_in_class_preserves_object(self): From cfd07bed1cb735d422137f24d4197f6ac1b317c5 Mon Sep 17 00:00:00 2001 From: liferoad Date: Wed, 27 Aug 2025 09:37:22 -0400 Subject: [PATCH 15/29] feat(mongodb): upgrade MongoDB Java driver to version 5.5.0 (#35946) * feat(mongodb): upgrade MongoDB Java driver to version 5.5.0 Update MongoDB Java driver from 3.12.11 to 5.5.0 and refactor code to use new API Add mongo-bson dependency required by new driver version Replace deprecated MongoClient with MongoClients and update GridFS implementation * refactor(mongodb): update MongoDB client usage to modern API Replace deprecated MongoClient with MongoClients.create() and update database drop method * build(dependencies): add mongodb driver core dependency Add mongodb-driver-core to support MongoDB Java driver functionality. Also mark mongo_java_driver as permitUnusedDeclared and add testImplementation. * fix(mongodb): update embedded mongo version and fix split key filtering Update embedded MongoDB test dependency to version 3.5.4 and simplify split key filtering logic by using BsonObjectId for range queries. This ensures proper type handling when filtering MongoDB documents by _id field. * build: add mongodb-driver-core dependency Add mongodb-driver-core version 5.5.0 to support MongoDB Java driver functionality * use version * refactor: simplify mongo client creation logic Remove redundant null check and consolidate uri handling in MongoDbGridFSIO --- .../beam/gradle/BeamModulePlugin.groovy | 4 +- it/mongodb/build.gradle | 1 + sdks/java/extensions/sql/build.gradle | 4 + .../provider/mongodb/MongoDbReadWriteIT.java | 7 +- sdks/java/io/mongodb/build.gradle | 5 +- .../apache/beam/sdk/io/mongodb/FindQuery.java | 5 +- .../beam/sdk/io/mongodb/MongoDbGridFSIO.java | 153 +++++++++--------- .../apache/beam/sdk/io/mongodb/MongoDbIO.java | 147 +++++++++-------- .../beam/sdk/io/mongodb/FindQueryTest.java | 5 +- .../sdk/io/mongodb/MongoDBGridFSIOTest.java | 114 ++++++------- .../beam/sdk/io/mongodb/MongoDbIOTest.java | 5 +- 11 files changed, 246 insertions(+), 204 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 012c8d225714..95a33a8520d8 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -840,7 +840,9 @@ class BeamModulePlugin implements Plugin { log4j2_log4j12_api : "org.apache.logging.log4j:log4j-1.2-api:$log4j2_version", mockito_core : "org.mockito:mockito-core:4.11.0", mockito_inline : "org.mockito:mockito-inline:4.11.0", - mongo_java_driver : "org.mongodb:mongo-java-driver:3.12.11", + mongo_java_driver : "org.mongodb:mongodb-driver-sync:5.5.0", + mongo_bson : "org.mongodb:bson:5.5.0", + mongodb_driver_core : "org.mongodb:mongodb-driver-core:5.5.0", nemo_compiler_frontend_beam : "org.apache.nemo:nemo-compiler-frontend-beam:$nemo_version", netty_all : "io.netty:netty-all:$netty_version", netty_handler : "io.netty:netty-handler:$netty_version", diff --git a/it/mongodb/build.gradle b/it/mongodb/build.gradle index 6be9b91f5b34..960e15af8394 100644 --- a/it/mongodb/build.gradle +++ b/it/mongodb/build.gradle @@ -35,6 +35,7 @@ dependencies { implementation library.java.testcontainers_mongodb implementation library.java.google_code_gson implementation library.java.mongo_java_driver + implementation library.java.mongo_bson implementation library.java.vendored_guava_32_1_2_jre testImplementation library.java.mockito_core diff --git a/sdks/java/extensions/sql/build.gradle b/sdks/java/extensions/sql/build.gradle index af8b6cba1742..5527493200f7 100644 --- a/sdks/java/extensions/sql/build.gradle +++ b/sdks/java/extensions/sql/build.gradle @@ -92,6 +92,9 @@ dependencies { implementation "org.codehaus.janino:commons-compiler:3.0.11" implementation library.java.jackson_core implementation library.java.mongo_java_driver + permitUnusedDeclared library.java.mongo_java_driver + implementation library.java.mongo_bson + implementation library.java.mongodb_driver_core implementation library.java.slf4j_api implementation library.java.joda_time implementation library.java.vendored_guava_32_1_2_jre @@ -131,6 +134,7 @@ dependencies { testImplementation library.java.kafka_clients testImplementation project(":sdks:java:io:kafka") testImplementation project(path: ":sdks:java:io:mongodb", configuration: "testRuntimeMigration") + testImplementation library.java.mongo_java_driver testImplementation project(path: ":sdks:java:io:thrift", configuration: "testRuntimeMigration") testImplementation project(path: ":sdks:java:extensions:protobuf", configuration: "testRuntimeMigration") testCompileOnly project(":sdks:java:extensions:sql:udf-test-provider") diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/mongodb/MongoDbReadWriteIT.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/mongodb/MongoDbReadWriteIT.java index 76be08fe9a6e..804639cacfc3 100644 --- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/mongodb/MongoDbReadWriteIT.java +++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/mongodb/MongoDbReadWriteIT.java @@ -31,7 +31,8 @@ import static org.hamcrest.core.IsInstanceOf.instanceOf; import com.mongodb.BasicDBObject; -import com.mongodb.MongoClient; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoClients; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; import com.mongodb.client.model.Filters; @@ -128,14 +129,14 @@ public static void setUp() throws Exception { .build(); mongodExecutable = mongodStarter.prepare(mongodConfig); mongodProcess = mongodExecutable.start(); - client = new MongoClient(hostname, port); + client = MongoClients.create("mongodb://" + hostname + ":" + port); mongoSqlUrl = String.format("mongodb://%s:%d/%s/%s", hostname, port, database, collection); } @AfterClass public static void tearDown() throws Exception { - client.dropDatabase(database); + client.getDatabase(database).drop(); client.close(); mongodProcess.stop(); mongodExecutable.stop(); diff --git a/sdks/java/io/mongodb/build.gradle b/sdks/java/io/mongodb/build.gradle index b9e90082f0dc..56d29750dead 100644 --- a/sdks/java/io/mongodb/build.gradle +++ b/sdks/java/io/mongodb/build.gradle @@ -28,13 +28,14 @@ dependencies { implementation project(path: ":sdks:java:core", configuration: "shadow") implementation library.java.joda_time implementation library.java.mongo_java_driver + implementation library.java.mongo_bson + implementation library.java.mongodb_driver_core implementation library.java.slf4j_api implementation library.java.vendored_guava_32_1_2_jre testImplementation library.java.junit testImplementation project(path: ":sdks:java:io:common") testImplementation project(path: ":sdks:java:testing:test-utils") - testImplementation "de.flapdoodle.embed:de.flapdoodle.embed.mongo:3.0.0" - testImplementation "de.flapdoodle.embed:de.flapdoodle.embed.process:3.0.0" + testImplementation "de.flapdoodle.embed:de.flapdoodle.embed.mongo:3.5.4" testRuntimeOnly library.java.slf4j_jdk14 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") } diff --git a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/FindQuery.java b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/FindQuery.java index 2131656d458a..d89db9dea54b 100644 --- a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/FindQuery.java +++ b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/FindQuery.java @@ -21,7 +21,7 @@ import com.google.auto.value.AutoValue; import com.mongodb.BasicDBObject; -import com.mongodb.MongoClient; +import com.mongodb.MongoClientSettings; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.model.Projections; @@ -79,7 +79,8 @@ private FindQuery withFilters(BsonDocument filters) { /** Convert the Bson filters into a BsonDocument via default encoding. */ static BsonDocument bson2BsonDocument(Bson filters) { - return filters.toBsonDocument(BasicDBObject.class, MongoClient.getDefaultCodecRegistry()); + return filters.toBsonDocument( + BasicDBObject.class, MongoClientSettings.getDefaultCodecRegistry()); } /** Sets the filters to find. */ diff --git a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbGridFSIO.java b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbGridFSIO.java index 07cc238c7e6b..71f8b291e0d5 100644 --- a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbGridFSIO.java +++ b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbGridFSIO.java @@ -21,15 +21,18 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; import com.google.auto.value.AutoValue; -import com.mongodb.DB; -import com.mongodb.DBCursor; -import com.mongodb.DBObject; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientURI; -import com.mongodb.gridfs.GridFS; -import com.mongodb.gridfs.GridFSDBFile; -import com.mongodb.gridfs.GridFSInputFile; -import com.mongodb.util.JSON; +import com.mongodb.ConnectionString; +import com.mongodb.MongoClientSettings; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoClients; +import com.mongodb.client.MongoCursor; +import com.mongodb.client.MongoDatabase; +import com.mongodb.client.gridfs.GridFSBucket; +import com.mongodb.client.gridfs.GridFSBuckets; +import com.mongodb.client.gridfs.GridFSDownloadStream; +import com.mongodb.client.gridfs.GridFSUploadStream; +import com.mongodb.client.gridfs.model.GridFSFile; +import com.mongodb.client.gridfs.model.GridFSUploadOptions; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -53,6 +56,7 @@ import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PDone; +import org.bson.Document; import org.bson.types.ObjectId; import org.checkerframework.checker.nullness.qual.Nullable; import org.checkerframework.dataflow.qual.Pure; @@ -117,16 +121,18 @@ public class MongoDbGridFSIO { /** Callback for the parser to use to submit data. */ public interface ParserCallback extends Serializable { - /** Output the object. The default timestamp will be the GridFSDBFile creation timestamp. */ + /** Output the object. The default timestamp will be the GridFSFile creation timestamp. */ void output(T output); /** Output the object using the specified timestamp. */ void output(T output, Instant timestamp); } - /** Interface for the parser that is used to parse the GridFSDBFile into the appropriate types. */ + /** Interface for the parser that is used to parse the GridFSFile into the appropriate types. */ public interface Parser extends Serializable { - void parse(GridFSDBFile input, ParserCallback callback) throws IOException; + void parse( + GridFSFile gridFSFile, GridFSDownloadStream downloadStream, ParserCallback callback) + throws IOException; } /** @@ -134,11 +140,10 @@ public interface Parser extends Serializable { * file into Strings. It uses the timestamp of the file for the event timestamp. */ private static final Parser TEXT_PARSER = - (input, callback) -> { - final Instant time = new Instant(input.getUploadDate().getTime()); + (gridFSFile, downloadStream, callback) -> { + final Instant time = new Instant(gridFSFile.getUploadDate().getTime()); try (BufferedReader reader = - new BufferedReader( - new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8))) { + new BufferedReader(new InputStreamReader(downloadStream, StandardCharsets.UTF_8))) { for (String line = reader.readLine(); line != null; line = reader.readLine()) { callback.output(line, time); } @@ -197,12 +202,20 @@ static ConnectionConfiguration create( } MongoClient setupMongo() { - return uri() == null ? new MongoClient() : new MongoClient(new MongoClientURI(uri())); + if (uri() == null) { + return MongoClients.create(); + } + MongoClientSettings settings = + MongoClientSettings.builder() + .applyConnectionString(new ConnectionString(Preconditions.checkStateNotNull(uri()))) + .build(); + return MongoClients.create(settings); } - GridFS setupGridFS(MongoClient mongo) { - DB db = database() == null ? mongo.getDB("gridfs") : mongo.getDB(database()); - return bucket() == null ? new GridFS(db) : new GridFS(db, bucket()); + GridFSBucket setupGridFS(MongoClient mongo) { + MongoDatabase db = + database() == null ? mongo.getDatabase("gridfs") : mongo.getDatabase(database()); + return bucket() == null ? GridFSBuckets.create(db) : GridFSBuckets.create(db, bucket()); } } @@ -313,12 +326,12 @@ public PCollection expand(PBegin input) { ParDo.of( new DoFn() { @Nullable MongoClient mongo; - @Nullable GridFS gridfs; + @Nullable GridFSBucket gridFSBucket; @Setup public void setup() { mongo = source.spec.connectionConfiguration().setupMongo(); - gridfs = source.spec.connectionConfiguration().setupGridFS(mongo); + gridFSBucket = source.spec.connectionConfiguration().setupGridFS(mongo); } @Teardown @@ -331,12 +344,18 @@ public void teardown() { @ProcessElement public void processElement(final ProcessContext c) throws IOException { - Preconditions.checkStateNotNull(gridfs); + GridFSBucket bucket = Preconditions.checkStateNotNull(gridFSBucket); ObjectId oid = c.element(); - GridFSDBFile file = gridfs.find(oid); + GridFSDownloadStream downloadStream = bucket.openDownloadStream(oid); + GridFSFile gridFSFile = + bucket.find(com.mongodb.client.model.Filters.eq("_id", oid)).first(); + if (gridFSFile == null) { + return; // Skip if file not found + } Parser parser = Preconditions.checkStateNotNull(parser()); parser.parse( - file, + gridFSFile, + downloadStream, new ParserCallback() { @Override public void output(T output, Instant timestamp) { @@ -378,12 +397,12 @@ protected static class BoundedGridFSSource extends BoundedSource { this.objectIds = objectIds; } - private DBCursor createCursor(GridFS gridfs) { + private MongoCursor createCursor(GridFSBucket gridFSBucket) { if (spec.filter() != null) { - DBObject query = (DBObject) JSON.parse(spec.filter()); - return gridfs.getFileList(query); + Document query = Document.parse(spec.filter()); + return gridFSBucket.find(query).iterator(); } - return gridfs.getFileList(); + return gridFSBucket.find().iterator(); } @Override @@ -391,20 +410,20 @@ public List> split( long desiredBundleSizeBytes, PipelineOptions options) throws Exception { MongoClient mongo = spec.connectionConfiguration().setupMongo(); try { - GridFS gridfs = spec.connectionConfiguration().setupGridFS(mongo); - DBCursor cursor = createCursor(gridfs); + GridFSBucket gridFSBucket = spec.connectionConfiguration().setupGridFS(mongo); + MongoCursor cursor = createCursor(gridFSBucket); long size = 0; List list = new ArrayList<>(); List objects = new ArrayList<>(); while (cursor.hasNext()) { - GridFSDBFile file = (GridFSDBFile) cursor.next(); + GridFSFile file = cursor.next(); long len = file.getLength(); if ((size + len) > desiredBundleSizeBytes && !objects.isEmpty()) { list.add(new BoundedGridFSSource(spec, objects)); size = 0; objects = new ArrayList<>(); } - objects.add((ObjectId) file.getId()); + objects.add(file.getObjectId()); size += len; } if (!objects.isEmpty() || list.isEmpty()) { @@ -419,10 +438,11 @@ public List> split( @Override public long getEstimatedSizeBytes(PipelineOptions options) throws Exception { try (MongoClient mongo = spec.connectionConfiguration().setupMongo(); - DBCursor cursor = createCursor(spec.connectionConfiguration().setupGridFS(mongo))) { + MongoCursor cursor = + createCursor(spec.connectionConfiguration().setupGridFS(mongo))) { long size = 0; while (cursor.hasNext()) { - GridFSDBFile file = (GridFSDBFile) cursor.next(); + GridFSFile file = cursor.next(); size += file.getLength(); } return size; @@ -456,7 +476,7 @@ static class GridFSReader extends BoundedSource.BoundedReader { final @Nullable List objects; @Nullable MongoClient mongo; - @Nullable DBCursor cursor; + @Nullable MongoCursor cursor; @Nullable Iterator iterator; @Nullable ObjectId current; @@ -474,8 +494,8 @@ public BoundedSource getCurrentSource() { public boolean start() throws IOException { if (objects == null) { mongo = source.spec.connectionConfiguration().setupMongo(); - GridFS gridfs = source.spec.connectionConfiguration().setupGridFS(mongo); - cursor = source.createCursor(gridfs); + GridFSBucket gridFSBucket = source.spec.connectionConfiguration().setupGridFS(mongo); + cursor = source.createCursor(gridFSBucket); } else { iterator = objects.iterator(); } @@ -488,8 +508,8 @@ public boolean advance() throws IOException { current = iterator.next(); return true; } else if (cursor != null && cursor.hasNext()) { - GridFSDBFile file = (GridFSDBFile) cursor.next(); - current = (ObjectId) file.getId(); + GridFSFile file = cursor.next(); + current = file.getObjectId(); return true; } current = null; @@ -628,9 +648,9 @@ private static class GridFsWriteFn extends DoFn { private final Write spec; private transient @Nullable MongoClient mongo; - private transient @Nullable GridFS gridfs; + private transient @Nullable GridFSBucket gridFSBucket; - private transient @Nullable GridFSInputFile gridFsFile; + private transient @Nullable GridFSUploadStream gridFsUploadStream; private transient @Nullable OutputStream outputStream; public GridFsWriteFn(Write spec) { @@ -640,20 +660,22 @@ public GridFsWriteFn(Write spec) { @Setup public void setup() throws Exception { mongo = spec.connectionConfiguration().setupMongo(); - gridfs = spec.connectionConfiguration().setupGridFS(mongo); + gridFSBucket = spec.connectionConfiguration().setupGridFS(mongo); } @StartBundle public void startBundle() { - GridFS gridfs = Preconditions.checkStateNotNull(this.gridfs); + GridFSBucket gridFSBucket = Preconditions.checkStateNotNull(this.gridFSBucket); String filename = Preconditions.checkStateNotNull(spec.filename()); - GridFSInputFile gridFsFile = gridfs.createFile(filename); + if (spec.chunkSize() != null) { - gridFsFile.setChunkSize(spec.chunkSize()); + gridFsUploadStream = + gridFSBucket.openUploadStream( + filename, new GridFSUploadOptions().chunkSizeBytes(spec.chunkSize().intValue())); + } else { + gridFsUploadStream = gridFSBucket.openUploadStream(filename); } - outputStream = gridFsFile.getOutputStream(); - - this.gridFsFile = gridFsFile; + outputStream = gridFsUploadStream; } @ProcessElement @@ -665,35 +687,20 @@ public void processElement(ProcessContext context) throws Exception { @FinishBundle public void finishBundle() throws Exception { - if (outputStream != null) { - OutputStream outputStream = this.outputStream; - outputStream.flush(); - outputStream.close(); - this.outputStream = null; - } - if (gridFsFile != null) { - gridFsFile = null; + GridFSUploadStream uploadStream = gridFsUploadStream; + if (uploadStream != null) { + uploadStream.flush(); + uploadStream.close(); + gridFsUploadStream = null; + outputStream = null; } } @Teardown public void teardown() throws Exception { - try { - if (outputStream != null) { - OutputStream outputStream = this.outputStream; - outputStream.flush(); - outputStream.close(); - this.outputStream = null; - } - if (gridFsFile != null) { - gridFsFile = null; - } - } finally { - if (mongo != null) { - mongo.close(); - mongo = null; - gridfs = null; - } + if (mongo != null) { + mongo.close(); + mongo = null; } } } diff --git a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbIO.java b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbIO.java index 905c7418e26c..1283e873f2b6 100644 --- a/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbIO.java +++ b/sdks/java/io/mongodb/src/main/java/org/apache/beam/sdk/io/mongodb/MongoDbIO.java @@ -22,12 +22,14 @@ import com.google.auto.value.AutoValue; import com.mongodb.BasicDBObject; +import com.mongodb.ConnectionString; import com.mongodb.MongoBulkWriteException; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientOptions; -import com.mongodb.MongoClientURI; +import com.mongodb.MongoClientSettings; +import com.mongodb.MongoClientSettings.Builder; import com.mongodb.MongoCommandException; import com.mongodb.client.AggregateIterable; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoClients; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.MongoDatabase; @@ -46,6 +48,7 @@ import java.util.Map; import java.util.NoSuchElementException; import java.util.Optional; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.net.ssl.SSLContext; import org.apache.beam.sdk.coders.Coder; @@ -64,6 +67,7 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.bson.BsonDocument; import org.bson.BsonInt32; +import org.bson.BsonObjectId; import org.bson.BsonString; import org.bson.Document; import org.bson.conversions.Bson; @@ -362,22 +366,25 @@ public void populateDisplayData(DisplayData.Builder builder) { } } - private static MongoClientOptions.Builder getOptions( + private static MongoClientSettings.Builder getOptions( int maxConnectionIdleTime, boolean sslEnabled, boolean sslInvalidHostNameAllowed, boolean ignoreSSLCertificate) { - MongoClientOptions.Builder optionsBuilder = new MongoClientOptions.Builder(); - optionsBuilder.maxConnectionIdleTime(maxConnectionIdleTime); + MongoClientSettings.Builder settingsBuilder = MongoClientSettings.builder(); + settingsBuilder.applyToConnectionPoolSettings( + builder -> builder.maxConnectionIdleTime(maxConnectionIdleTime, TimeUnit.MILLISECONDS)); if (sslEnabled) { - optionsBuilder.sslEnabled(sslEnabled).sslInvalidHostNameAllowed(sslInvalidHostNameAllowed); - if (ignoreSSLCertificate) { - SSLContext sslContext = SSLUtils.ignoreSSLCertificate(); - optionsBuilder.sslContext(sslContext); - optionsBuilder.socketFactory(sslContext.getSocketFactory()); - } + settingsBuilder.applyToSslSettings( + builder -> { + builder.enabled(sslEnabled).invalidHostNameAllowed(sslInvalidHostNameAllowed); + if (ignoreSSLCertificate) { + SSLContext sslContext = SSLUtils.ignoreSSLCertificate(); + builder.context(sslContext); + } + }); } - return optionsBuilder; + return settingsBuilder; } /** A MongoDB {@link BoundedSource} reading {@link Document} from a given instance. */ @@ -414,15 +421,15 @@ long getDocumentCount() { String uri = Preconditions.checkStateNotNull(spec.uri()); String database = Preconditions.checkStateNotNull(spec.database()); String collection = Preconditions.checkStateNotNull(spec.collection()); - try (MongoClient mongoClient = - new MongoClient( - new MongoClientURI( - uri, - getOptions( - spec.maxConnectionIdleTime(), - spec.sslEnabled(), - spec.sslInvalidHostNameAllowed(), - spec.ignoreSSLCertificate())))) { + MongoClientSettings settings = + getOptions( + spec.maxConnectionIdleTime(), + spec.sslEnabled(), + spec.sslInvalidHostNameAllowed(), + spec.ignoreSSLCertificate()) + .applyConnectionString(new ConnectionString(uri)) + .build(); + try (MongoClient mongoClient = MongoClients.create(settings)) { return getDocumentCount(mongoClient, database, collection); } catch (Exception e) { return -1; @@ -446,15 +453,15 @@ public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) { String uri = Preconditions.checkStateNotNull(spec.uri()); String database = Preconditions.checkStateNotNull(spec.database()); String collection = Preconditions.checkStateNotNull(spec.collection()); - try (MongoClient mongoClient = - new MongoClient( - new MongoClientURI( - uri, - getOptions( - spec.maxConnectionIdleTime(), - spec.sslEnabled(), - spec.sslInvalidHostNameAllowed(), - spec.ignoreSSLCertificate())))) { + MongoClientSettings settings = + getOptions( + spec.maxConnectionIdleTime(), + spec.sslEnabled(), + spec.sslInvalidHostNameAllowed(), + spec.ignoreSSLCertificate()) + .applyConnectionString(new ConnectionString(uri)) + .build(); + try (MongoClient mongoClient = MongoClients.create(settings)) { try { return getEstimatedSizeBytes(mongoClient, database, collection); } catch (MongoCommandException exception) { @@ -483,15 +490,15 @@ public List> split( String uri = Preconditions.checkStateNotNull(spec.uri()); String database = Preconditions.checkStateNotNull(spec.database()); String collection = Preconditions.checkStateNotNull(spec.collection()); - try (MongoClient mongoClient = - new MongoClient( - new MongoClientURI( - uri, - getOptions( - spec.maxConnectionIdleTime(), - spec.sslEnabled(), - spec.sslInvalidHostNameAllowed(), - spec.ignoreSSLCertificate())))) { + MongoClientSettings settings = + getOptions( + spec.maxConnectionIdleTime(), + spec.sslEnabled(), + spec.sslInvalidHostNameAllowed(), + spec.ignoreSSLCertificate()) + .applyConnectionString(new ConnectionString(uri)) + .build(); + try (MongoClient mongoClient = MongoClients.create(settings)) { MongoDatabase mongoDatabase = mongoClient.getDatabase(database); List splitKeys; @@ -671,26 +678,39 @@ static List splitKeysToMatch(List splitKeys) { if (i == 0) { aggregates.add(Aggregates.match(Filters.lte("_id", splitKey))); if (splitKeys.size() == 1) { - aggregates.add(Aggregates.match(Filters.and(Filters.gt("_id", splitKey)))); + aggregates.add(Aggregates.match(Filters.gt("_id", splitKey))); } } else if (i == splitKeys.size() - 1) { // this is the last split in the list, the filters define // the range from the previous split to the current split and also // the current split to the end - aggregates.add( - Aggregates.match( - Filters.and(Filters.gt("_id", lowestBound), Filters.lte("_id", splitKey)))); - aggregates.add(Aggregates.match(Filters.and(Filters.gt("_id", splitKey)))); + // Create a custom BSON document with multiple conditions on the same field + BsonDocument rangeFilter = + new BsonDocument( + "_id", + new BsonDocument( + "$gt", new BsonObjectId(Preconditions.checkStateNotNull(lowestBound))) + .append("$lte", new BsonObjectId(splitKey))); + aggregates.add(Aggregates.match(rangeFilter)); + aggregates.add(Aggregates.match(Filters.gt("_id", splitKey))); } else { - aggregates.add( - Aggregates.match( - Filters.and(Filters.gt("_id", lowestBound), Filters.lte("_id", splitKey)))); + // Create a custom BSON document with multiple conditions on the same field + BsonDocument rangeFilter = + new BsonDocument( + "_id", + new BsonDocument( + "$gt", new BsonObjectId(Preconditions.checkStateNotNull(lowestBound))) + .append("$lte", new BsonObjectId(splitKey))); + aggregates.add(Aggregates.match(rangeFilter)); } lowestBound = splitKey; } return aggregates.stream() - .map(s -> s.toBsonDocument(BasicDBObject.class, MongoClient.getDefaultCodecRegistry())) + .map( + s -> + s.toBsonDocument( + BasicDBObject.class, MongoClientSettings.getDefaultCodecRegistry())) .collect(Collectors.toList()); } @@ -786,14 +806,15 @@ public void close() { private MongoClient createClient(Read spec) { String uri = Preconditions.checkStateNotNull(spec.uri(), "withUri() is required"); - return new MongoClient( - new MongoClientURI( - uri, - getOptions( + MongoClientSettings settings = + getOptions( spec.maxConnectionIdleTime(), spec.sslEnabled(), spec.sslInvalidHostNameAllowed(), - spec.ignoreSSLCertificate()))); + spec.ignoreSSLCertificate()) + .applyConnectionString(new ConnectionString(uri)) + .build(); + return MongoClients.create(settings); } } @@ -985,15 +1006,15 @@ static class WriteFn extends DoFn { @Setup public void createMongoClient() { String uri = Preconditions.checkStateNotNull(spec.uri()); - client = - new MongoClient( - new MongoClientURI( - uri, - getOptions( - spec.maxConnectionIdleTime(), - spec.sslEnabled(), - spec.sslInvalidHostNameAllowed(), - spec.ignoreSSLCertificate()))); + MongoClientSettings settings = + getOptions( + spec.maxConnectionIdleTime(), + spec.sslEnabled(), + spec.sslInvalidHostNameAllowed(), + spec.ignoreSSLCertificate()) + .applyConnectionString(new ConnectionString(uri)) + .build(); + client = MongoClients.create(settings); } @StartBundle diff --git a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/FindQueryTest.java b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/FindQueryTest.java index df66179f3904..da90f92dc190 100644 --- a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/FindQueryTest.java +++ b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/FindQueryTest.java @@ -21,7 +21,7 @@ import com.google.auto.value.AutoValue; import com.mongodb.BasicDBObject; -import com.mongodb.MongoClient; +import com.mongodb.MongoClientSettings; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.model.Projections; @@ -79,7 +79,8 @@ private FindQueryTest withFilters(BsonDocument filters) { /** Convert the Bson filters into a BsonDocument via default encoding. */ static BsonDocument bson2BsonDocument(Bson filters) { - return filters.toBsonDocument(BasicDBObject.class, MongoClient.getDefaultCodecRegistry()); + return filters.toBsonDocument( + BasicDBObject.class, MongoClientSettings.getDefaultCodecRegistry()); } /** Sets the filters to find. */ diff --git a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBGridFSIOTest.java b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBGridFSIOTest.java index 09343606f228..d13185a08fb6 100644 --- a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBGridFSIOTest.java +++ b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDBGridFSIOTest.java @@ -20,11 +20,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import com.mongodb.DB; -import com.mongodb.MongoClient; -import com.mongodb.gridfs.GridFS; -import com.mongodb.gridfs.GridFSDBFile; -import com.mongodb.gridfs.GridFSInputFile; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoClients; +import com.mongodb.client.MongoDatabase; +import com.mongodb.client.gridfs.GridFSBucket; +import com.mongodb.client.gridfs.GridFSBuckets; +import com.mongodb.client.gridfs.GridFSUploadStream; +import com.mongodb.client.gridfs.model.GridFSFile; import de.flapdoodle.embed.mongo.MongodExecutable; import de.flapdoodle.embed.mongo.MongodProcess; import de.flapdoodle.embed.mongo.MongodStarter; @@ -35,12 +37,10 @@ import de.flapdoodle.embed.mongo.distribution.Version; import de.flapdoodle.embed.process.runtime.Network; import java.io.BufferedReader; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.OutputStream; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -117,9 +117,9 @@ public static void start() throws Exception { LOG.info("Insert test data"); - MongoClient client = new MongoClient("localhost", port); - DB database = client.getDB(DATABASE); - GridFS gridfs = new GridFS(database); + MongoClient client = MongoClients.create("mongodb://localhost:" + port); + MongoDatabase database = client.getDatabase(DATABASE); + GridFSBucket gridfs = GridFSBuckets.create(database); ByteArrayOutputStream out = new ByteArrayOutputStream(); for (int x = 0; x < 100; x++) { @@ -129,10 +129,12 @@ public static void start() throws Exception { .getBytes(StandardCharsets.UTF_8)); } for (int x = 0; x < 5; x++) { - gridfs.createFile(new ByteArrayInputStream(out.toByteArray()), "file" + x).save(); + try (GridFSUploadStream uploadStream = gridfs.openUploadStream("file" + x)) { + uploadStream.write(out.toByteArray()); + } } - gridfs = new GridFS(database, "mapBucket"); + GridFSBucket mapBucketGridfs = GridFSBuckets.create(database, "mapBucket"); long now = System.currentTimeMillis(); Random random = new Random(); String[] scientists = { @@ -148,26 +150,25 @@ public static void start() throws Exception { "Maxwell" }; for (int x = 0; x < 10; x++) { - GridFSInputFile file = gridfs.createFile("file_" + x); - OutputStream outf = file.getOutputStream(); - OutputStreamWriter writer = new OutputStreamWriter(outf, StandardCharsets.UTF_8); - for (int y = 0; y < 5000; y++) { - long time = now - random.nextInt(3600000); - String name = scientists[y % scientists.length]; - writer.write(time + "\t"); - writer.write(name + "\t"); - writer.write(Integer.toString(random.nextInt(100))); - writer.write("\n"); - } - for (int y = 0; y < scientists.length; y++) { - String name = scientists[y % scientists.length]; - writer.write(now + "\t"); - writer.write(name + "\t"); - writer.write("101"); - writer.write("\n"); + try (GridFSUploadStream uploadStream = mapBucketGridfs.openUploadStream("file_" + x)) { + OutputStreamWriter writer = new OutputStreamWriter(uploadStream, StandardCharsets.UTF_8); + for (int y = 0; y < 5000; y++) { + long time = now - random.nextInt(3600000); + String name = scientists[y % scientists.length]; + writer.write(time + "\t"); + writer.write(name + "\t"); + writer.write(Integer.toString(random.nextInt(100))); + writer.write("\n"); + } + for (int y = 0; y < scientists.length; y++) { + String name = scientists[y % scientists.length]; + writer.write(now + "\t"); + writer.write(name + "\t"); + writer.write("101"); + writer.write("\n"); + } + writer.flush(); } - writer.flush(); - writer.close(); } client.close(); } @@ -208,11 +209,10 @@ public void testReadWithParser() { .withDatabase(DATABASE) .withBucket("mapBucket") .>withParser( - (input, callback) -> { + (gridFSFile, downloadStream, callback) -> { try (final BufferedReader reader = new BufferedReader( - new InputStreamReader( - input.getInputStream(), StandardCharsets.UTF_8))) { + new InputStreamReader(downloadStream, StandardCharsets.UTF_8))) { String line = reader.readLine(); while (line != null) { try (Scanner scanner = new Scanner(line.trim())) { @@ -311,19 +311,20 @@ public void testWriteMessage() throws Exception { MongoClient client = null; try { StringBuilder results = new StringBuilder(); - client = new MongoClient("localhost", port); - DB database = client.getDB(DATABASE); - GridFS gridfs = new GridFS(database, "WriteTest"); - List files = gridfs.find("WriteTestData"); - assertTrue(files.size() > 0); - for (GridFSDBFile file : files) { - assertEquals(100, file.getChunkSize()); - int l = (int) file.getLength(); - try (InputStream ins = file.getInputStream()) { - DataInputStream dis = new DataInputStream(ins); - byte[] b = new byte[l]; - dis.readFully(b); - results.append(new String(b, StandardCharsets.UTF_8)); + client = MongoClients.create("mongodb://localhost:" + port); + MongoDatabase database = client.getDatabase(DATABASE); + GridFSBucket gridfs = GridFSBuckets.create(database, "WriteTest"); + + for (GridFSFile file : gridfs.find()) { + if (file.getFilename().equals("WriteTestData")) { + assertEquals(100, file.getChunkSize()); + int l = (int) file.getLength(); + try (InputStream ins = gridfs.openDownloadStream(file.getObjectId())) { + DataInputStream dis = new DataInputStream(ins); + byte[] b = new byte[l]; + dis.readFully(b); + results.append(new String(b, StandardCharsets.UTF_8)); + } } } String dataString = results.toString(); @@ -331,16 +332,17 @@ public void testWriteMessage() throws Exception { assertTrue(dataString.contains("Message " + x)); } - files = gridfs.find("WriteTestIntData"); boolean[] intResults = new boolean[100]; - for (GridFSDBFile file : files) { - int l = (int) file.getLength(); - try (InputStream ins = file.getInputStream()) { - DataInputStream dis = new DataInputStream(ins); - byte[] b = new byte[l]; - dis.readFully(b); - for (byte aB : b) { - intResults[aB] = true; + for (GridFSFile file : gridfs.find()) { + if (file.getFilename().equals("WriteTestIntData")) { + int l = (int) file.getLength(); + try (InputStream ins = gridfs.openDownloadStream(file.getObjectId())) { + DataInputStream dis = new DataInputStream(ins); + byte[] b = new byte[l]; + dis.readFully(b); + for (byte aB : b) { + intResults[aB] = true; + } } } } diff --git a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDbIOTest.java b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDbIOTest.java index 4dda988e355c..cc85db937975 100644 --- a/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDbIOTest.java +++ b/sdks/java/io/mongodb/src/test/java/org/apache/beam/sdk/io/mongodb/MongoDbIOTest.java @@ -21,7 +21,8 @@ import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertEquals; -import com.mongodb.MongoClient; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoClients; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; import com.mongodb.client.model.Filters; @@ -107,7 +108,7 @@ public static void beforeClass() throws Exception { .build(); mongodExecutable = mongodStarter.prepare(mongodConfig); mongodProcess = mongodExecutable.start(); - client = new MongoClient("localhost", port); + client = MongoClients.create("mongodb://localhost:" + port); database = client.getDatabase(DATABASE_NAME); LOG.info("Insert test data"); From 7f90455bb1089cdc1daa82b55e5cc269bbd0a2c2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 10:21:11 -0400 Subject: [PATCH 16/29] Bump github.com/aws/aws-sdk-go-v2/credentials in /sdks (#35974) Bumps [github.com/aws/aws-sdk-go-v2/credentials](https://github.com/aws/aws-sdk-go-v2) from 1.18.6 to 1.18.7. - [Release notes](https://github.com/aws/aws-sdk-go-v2/releases) - [Changelog](https://github.com/aws/aws-sdk-go-v2/blob/config/v1.18.7/CHANGELOG.md) - [Commits](https://github.com/aws/aws-sdk-go-v2/compare/config/v1.18.6...config/v1.18.7) --- updated-dependencies: - dependency-name: github.com/aws/aws-sdk-go-v2/credentials dependency-version: 1.18.7 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- sdks/go.mod | 4 ++-- sdks/go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sdks/go.mod b/sdks/go.mod index d9b3791ffe16..3e084a51bc2d 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -34,7 +34,7 @@ require ( cloud.google.com/go/storage v1.56.0 github.com/aws/aws-sdk-go-v2 v1.38.1 github.com/aws/aws-sdk-go-v2/config v1.31.2 - github.com/aws/aws-sdk-go-v2/credentials v1.18.6 + github.com/aws/aws-sdk-go-v2/credentials v1.18.7 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.18.5 github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 github.com/aws/smithy-go v1.22.5 @@ -156,7 +156,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.3 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.28.2 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.2 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.34.0 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.38.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect diff --git a/sdks/go.sum b/sdks/go.sum index d6f950f7ceba..dc0560215933 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -761,8 +761,8 @@ github.com/aws/aws-sdk-go-v2/config v1.31.2 h1:NOaSZpVGEH2Np/c1toSeW0jooNl+9ALms github.com/aws/aws-sdk-go-v2/config v1.31.2/go.mod h1:17ft42Yb2lF6OigqSYiDAiUcX4RIkEMY6XxEMJsrAes= github.com/aws/aws-sdk-go-v2/credentials v1.11.2/go.mod h1:j8YsY9TXTm31k4eFhspiQicfXPLZ0gYXA50i4gxPE8g= github.com/aws/aws-sdk-go-v2/credentials v1.16.2/go.mod h1:sDdvGhXrSVT5yzBDR7qXz+rhbpiMpUYfF3vJ01QSdrc= -github.com/aws/aws-sdk-go-v2/credentials v1.18.6 h1:AmmvNEYrru7sYNJnp3pf57lGbiarX4T9qU/6AZ9SucU= -github.com/aws/aws-sdk-go-v2/credentials v1.18.6/go.mod h1:/jdQkh1iVPa01xndfECInp1v1Wnp70v3K4MvtlLGVEc= +github.com/aws/aws-sdk-go-v2/credentials v1.18.7 h1:zqg4OMrKj+t5HlswDApgvAHjxKtlduKS7KicXB+7RLg= +github.com/aws/aws-sdk-go-v2/credentials v1.18.7/go.mod h1:/4M5OidTskkgkv+nCIfC9/tbiQ/c8qTox9QcUDV0cgc= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.3/go.mod h1:uk1vhHHERfSVCUnqSqz8O48LBYDSC+k6brng09jcMOk= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.4/go.mod h1:t4i+yGHMCcUNIX1x7YVYa6bH/Do7civ5I6cG/6PMfyA= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.4 h1:lpdMwTzmuDLkgW7086jE94HweHCqG+uOJwHf3LZs7T0= @@ -816,8 +816,8 @@ github.com/aws/aws-sdk-go-v2/service/sso v1.17.2/go.mod h1:/pE21vno3q1h4bbhUOEi+ github.com/aws/aws-sdk-go-v2/service/sso v1.28.2 h1:ve9dYBB8CfJGTFqcQ3ZLAAb/KXWgYlgu/2R2TZL2Ko0= github.com/aws/aws-sdk-go-v2/service/sso v1.28.2/go.mod h1:n9bTZFZcBa9hGGqVz3i/a6+NG0zmZgtkB9qVVFDqPA8= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.20.0/go.mod h1:dWqm5G767qwKPuayKfzm4rjzFmVjiBFbOJrpSPnAMDs= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.2 h1:pd9G9HQaM6UZAZh19pYOkpKSQkyQQ9ftnl/LttQOcGI= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.2/go.mod h1:eknndR9rU8UpE/OmFpqU78V1EcXPKFTTm5l/buZYgvM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.34.0 h1:Bnr+fXrlrPEoR1MAFrHVsge3M/WoK4n23VNhRM7TPHI= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.34.0/go.mod h1:eknndR9rU8UpE/OmFpqU78V1EcXPKFTTm5l/buZYgvM= github.com/aws/aws-sdk-go-v2/service/sts v1.16.3/go.mod h1:bfBj0iVmsUyUg4weDB4NxktD9rDGeKSVWnjTnwbx9b8= github.com/aws/aws-sdk-go-v2/service/sts v1.25.3/go.mod h1:4EqRHDCKP78hq3zOnmFXu5k0j4bXbRFfCh/zQ6KnEfQ= github.com/aws/aws-sdk-go-v2/service/sts v1.38.0 h1:iV1Ko4Em/lkJIsoKyGfc0nQySi+v0Udxr6Igq+y9JZc= From 4c9799388c0386920fa2c058c5b66b8a9b0505bd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 10:29:27 -0400 Subject: [PATCH 17/29] Bump google.golang.org/grpc from 1.74.2 to 1.75.0 in /sdks (#35971) Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.74.2 to 1.75.0. - [Release notes](https://github.com/grpc/grpc-go/releases) - [Commits](https://github.com/grpc/grpc-go/compare/v1.74.2...v1.75.0) --- updated-dependencies: - dependency-name: google.golang.org/grpc dependency-version: 1.75.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- sdks/go.mod | 16 ++++++++-------- sdks/go.sum | 36 ++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/sdks/go.mod b/sdks/go.mod index 3e084a51bc2d..b3fac9865115 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -62,7 +62,7 @@ require ( golang.org/x/text v0.28.0 google.golang.org/api v0.248.0 google.golang.org/genproto v0.0.0-20250603155806-513f23925822 - google.golang.org/grpc v1.74.2 + google.golang.org/grpc v1.75.0 google.golang.org/protobuf v1.36.8 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 @@ -84,7 +84,7 @@ require ( dario.cat/mergo v1.0.1 // indirect filippo.io/edwards25519 v1.1.0 // indirect github.com/GoogleCloudPlatform/grpc-gcp-go/grpcgcp v1.5.3 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 // indirect github.com/apache/arrow/go/v15 v15.0.2 // indirect @@ -96,7 +96,7 @@ require ( github.com/distribution/reference v0.6.0 // indirect github.com/ebitengine/purego v0.8.4 // indirect github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect - github.com/go-jose/go-jose/v4 v4.0.5 // indirect + github.com/go-jose/go-jose/v4 v4.1.1 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect @@ -125,12 +125,12 @@ require ( go.opentelemetry.io/contrib/detectors/gcp v1.36.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect - go.opentelemetry.io/otel v1.36.0 // indirect + go.opentelemetry.io/otel v1.37.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 // indirect - go.opentelemetry.io/otel/metric v1.36.0 // indirect - go.opentelemetry.io/otel/sdk v1.36.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect - go.opentelemetry.io/otel/trace v1.36.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/otel/sdk v1.37.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.37.0 // indirect + go.opentelemetry.io/otel/trace v1.37.0 // indirect go.shabbyrobe.org/gocovmerge v0.0.0-20230507111327-fa4f82cfbf4d // indirect golang.org/x/time v0.12.0 // indirect ) diff --git a/sdks/go.sum b/sdks/go.sum index dc0560215933..87152805d8b5 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -705,8 +705,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym github.com/GoogleCloudPlatform/cloudsql-proxy v1.29.0/go.mod h1:spvB9eLJH9dutlbPSRmHvSXXHOwGRyeXh1jVdquA2G8= github.com/GoogleCloudPlatform/grpc-gcp-go/grpcgcp v1.5.3 h1:2afWGsMzkIcN8Qm4mgPJKZWyroE5QBszMiDMYEBrnfw= github.com/GoogleCloudPlatform/grpc-gcp-go/grpcgcp v1.5.3/go.mod h1:dppbR7CwXD4pgtV9t3wD1812RaLDcBjtblcDF5f1vI0= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 h1:ErKg/3iS1AKcTkf3yixlZ54f9U1rljCkQyEXWUnIUxc= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0/go.mod h1:yAZHSGnqScoU556rBOVkwLze6WP5N+U11RHuWaGVxwY= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 h1:UQUsRi8WTzhZntp5313l+CHIAT95ojUI2lpP/ExlZa4= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0/go.mod h1:Cz6ft6Dkn3Et6l2v2a9/RpN7epQ1GtDlO6lj8bEcOvw= github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 h1:owcC2UnmsZycprQ5RfRgjydWhuoxg71LUfyiQdijZuM= github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0/go.mod h1:ZPpqegjbE99EPKsu3iUWV22A04wzGPcAY/ziSIQEEgs= github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0 h1:4LP6hvB4I5ouTbGgWtixJhgED6xdf67twf9PoY96Tbg= @@ -954,8 +954,8 @@ github.com/go-gorp/gorp v2.2.0+incompatible/go.mod h1:7IfkAQnO7jfT/9IQ3R9wL1dFhu github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= -github.com/go-jose/go-jose/v4 v4.0.5 h1:M6T8+mKZl/+fNNuFHvGIzDz7BTLQPIounk/b9dw3AaE= -github.com/go-jose/go-jose/v4 v4.0.5/go.mod h1:s3P1lRrkT8igV8D9OjyL4WRyHvjB6a4JSllnOrmmBOA= +github.com/go-jose/go-jose/v4 v4.1.1 h1:JYhSgy4mXXzAdF3nUx3ygx347LRXJRrpgyU3adRmkAI= +github.com/go-jose/go-jose/v4 v4.1.1/go.mod h1:BdsZGqgdO3b6tTc6LSE56wcDbMMLuPsw5d4ZD5f94kA= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81/go.mod h1:SX0U8uGpxhq9o2S/CELCSUxEWWAuoCUcVCQWv7G2OCk= @@ -1495,22 +1495,22 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.6 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0/go.mod h1:snMWehoOh2wsEwnvvwtDyFCxVeDAODenXHtn5vzrKjo= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= -go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= -go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 h1:wpMfgF8E1rkrT1Z6meFh1NDtownE9Ii3n3X2GJYjsaU= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0/go.mod h1:wAy0T/dUbs468uOlkT31xjvqQgEVXv58BRFWEgn5v/0= go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0 h1:rixTyDGXFxRy1xzhKrotaHy3/KXdPhlWARrCgK+eqUY= go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0/go.mod h1:dowW6UsM9MKbJq5JTz2AMVp3/5iW5I/TStsk8S+CfHw= -go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= -go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= -go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= -go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= -go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis= -go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4= -go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= -go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= +go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= +go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= +go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.15.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1981,8 +1981,8 @@ gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJ gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA= -gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o= -gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= @@ -2261,8 +2261,8 @@ google.golang.org/grpc v1.52.3/go.mod h1:pu6fVzoFb+NBYNAvQL08ic+lvB2IojljRYuun5v google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= google.golang.org/grpc v1.56.3/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= -google.golang.org/grpc v1.74.2 h1:WoosgB65DlWVC9FqI82dGsZhWFNBSLjQ84bjROOpMu4= -google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM= +google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= +google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= From 62cbf83ac8faa3371ab847ca9c1304861f901722 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Wed, 27 Aug 2025 10:50:04 -0400 Subject: [PATCH 18/29] Override localhost endpoint when a worker is running in docker on mac (#35964) --- .../runners/prism/internal/environments.go | 2 +- .../runners/prism/internal/jobservices/job.go | 2 +- .../runners/prism/internal/worker/worker.go | 35 ++++++++++++++++--- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/sdks/go/pkg/beam/runners/prism/internal/environments.go b/sdks/go/pkg/beam/runners/prism/internal/environments.go index 3239c76dfe1f..971bb4f83cfa 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/environments.go +++ b/sdks/go/pkg/beam/runners/prism/internal/environments.go @@ -79,7 +79,7 @@ func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *wor logger.Error("unmarshaling docker environment payload", "error", err) return err } - return dockerEnvironment(ctx, logger, dp, wk, j.ArtifactEndpoint()) + return dockerEnvironment(ctx, logger, dp, wk, wk.ArtifactEndpoint) case urns.EnvProcess: pp := &pipepb.ProcessPayload{} if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), pp); err != nil { diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go index f186b11fd1d8..ae0e3e73e860 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go @@ -208,7 +208,7 @@ func (j *Job) MakeWorker(env string) *worker.W { wk.EnvPb = j.Pipeline.GetComponents().GetEnvironments()[env] wk.PipelineOptions = j.PipelineOptions() wk.JobKey = j.JobKey() - wk.ArtifactEndpoint = j.ArtifactEndpoint() + wk.ResolveEndpoints(j.ArtifactEndpoint()) return wk } diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go index b4133b0332a6..1141a5b02304 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go @@ -24,6 +24,9 @@ import ( "io" "log/slog" "net" + "os" + "runtime" + "strings" "sync" "sync/atomic" @@ -58,9 +61,9 @@ type W struct { ID, Env string - JobKey, ArtifactEndpoint string - EnvPb *pipepb.Environment - PipelineOptions *structpb.Struct + JobKey, ArtifactEndpoint, endpoint string + EnvPb *pipepb.Environment + PipelineOptions *structpb.Struct // These are the ID sources inst uint64 @@ -79,8 +82,32 @@ type controlResponder interface { Respond(*fnpb.InstructionResponse) } +// resolveEndpoint checks if the worker is running inside a docker container on mac or Windows and +// if the endpoint is a "localhost" endpoint. If so, overrides it with "host.docker.internal". +// Reference: https://docs.docker.com/desktop/features/networking/#networking-mode-and-dns-behaviour-for-mac-and-windows +func (wk *W) resolveEndpoint(endpoint string) string { + // The presence of an external environment does not guarantee execution within + // Docker, as Python's LOOPBACK also runs in an external environment. + // A specific check for the "BEAM_WORKER_POOL_IN_DOCKER_VM" environment variable is required to confirm + // if the worker is running inside a Docker container. + // Python LOOPBACK mode: https://github.com/apache/beam/blob/0589b14812ec52bff9d20d3bfcd96da393b9ebdb/sdks/python/apache_beam/runners/portability/portable_runner.py#L397 + // External Environment: https://beam.apache.org/documentation/runtime/sdk-harness-config/ + + workerInDocker := wk.EnvPb.GetUrn() == urns.EnvDocker || + (wk.EnvPb.GetUrn() == urns.EnvExternal && (os.Getenv("BEAM_WORKER_POOL_IN_DOCKER_VM") == "1")) + if runtime.GOOS != "linux" && workerInDocker && strings.HasPrefix(endpoint, "localhost:") { + return "host.docker.internal:" + strings.TrimPrefix(endpoint, "localhost:") + } + return endpoint +} + +func (wk *W) ResolveEndpoints(artifactEndpoint string) { + wk.ArtifactEndpoint = wk.resolveEndpoint(artifactEndpoint) + wk.endpoint = wk.resolveEndpoint(wk.parentPool.endpoint) +} + func (wk *W) Endpoint() string { - return wk.parentPool.endpoint + return wk.endpoint } func (wk *W) String() string { From 2bcca4811b341cefb5eac6ba9375c110c5457d27 Mon Sep 17 00:00:00 2001 From: liferoad Date: Wed, 27 Aug 2025 12:27:32 -0400 Subject: [PATCH 19/29] fix(parquetio): handle missing nullable fields in row conversion (#35948) * fix(parquetio): handle missing nullable fields in row conversion Add null value handling when converting rows to Arrow tables for nullable fields that are missing from input data. This fixes KeyError when writing to Parquet with missing nullable fields, addressing issue #35791. * fix lint --- sdks/python/apache_beam/io/parquetio.py | 7 +- sdks/python/apache_beam/io/parquetio_test.py | 75 +++++++++++++++++++- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/io/parquetio.py b/sdks/python/apache_beam/io/parquetio.py index fa8b56f916dc..82ae9a50ace4 100644 --- a/sdks/python/apache_beam/io/parquetio.py +++ b/sdks/python/apache_beam/io/parquetio.py @@ -119,7 +119,12 @@ def process(self, row, w=DoFn.WindowParam, pane=DoFn.PaneInfoParam): # reorder the data in columnar format. for i, n in enumerate(self._schema.names): - self._buffer[i].append(row[n]) + # Handle missing nullable fields by using None as default value + field = self._schema.field(i) + if field.nullable and n not in row: + self._buffer[i].append(None) + else: + self._buffer[i].append(row[n]) def finish_bundle(self): if len(self._buffer[0]) > 0: diff --git a/sdks/python/apache_beam/io/parquetio_test.py b/sdks/python/apache_beam/io/parquetio_test.py index 9371705a1fa3..78d1db4cc7c2 100644 --- a/sdks/python/apache_beam/io/parquetio_test.py +++ b/sdks/python/apache_beam/io/parquetio_test.py @@ -59,12 +59,11 @@ try: import pyarrow as pa import pyarrow.parquet as pq + ARROW_MAJOR_VERSION, _, _ = map(int, pa.__version__.split('.')) except ImportError: pa = None - pl = None pq = None - -ARROW_MAJOR_VERSION, _, _ = map(int, pa.__version__.split('.')) + ARROW_MAJOR_VERSION = 0 @unittest.skipIf(pa is None, "PyArrow is not installed.") @@ -422,6 +421,76 @@ def test_schema_read_write(self): | Map(stable_repr)) assert_that(readback, equal_to([stable_repr(r) for r in rows])) + def test_write_with_nullable_fields_missing_data(self): + """Test WriteToParquet with nullable fields where some fields are missing. + + This test addresses the bug reported in: + https://github.com/apache/beam/issues/35791 + where WriteToParquet fails with a KeyError if any nullable + field is missing in the data. + """ + # Define PyArrow schema with all fields nullable + schema = pa.schema([ + pa.field("id", pa.int64(), nullable=True), + pa.field("name", pa.string(), nullable=True), + pa.field("age", pa.int64(), nullable=True), + pa.field("email", pa.string(), nullable=True), + ]) + + # Sample data with missing nullable fields + data = [ + { + 'id': 1, 'name': 'Alice', 'age': 30 + }, # missing 'email' + { + 'id': 2, 'name': 'Bob', 'age': 25, 'email': 'bob@example.com' + }, # all fields present + { + 'id': 3, 'name': 'Charlie', 'age': None, 'email': None + }, # explicit None values + { + 'id': 4, 'name': 'David' + }, # missing 'age' and 'email' + ] + + with TemporaryDirectory() as tmp_dirname: + path = os.path.join(tmp_dirname, 'nullable_test') + + # Write data with missing nullable fields - this should not raise KeyError + with TestPipeline() as p: + _ = ( + p + | Create(data) + | WriteToParquet( + path, schema, num_shards=1, shard_name_template='')) + + # Read back and verify the data + with TestPipeline() as p: + readback = ( + p + | ReadFromParquet(path + '*') + | Map(json.dumps, sort_keys=True)) + + # Expected data should have None for missing nullable fields + expected_data = [ + { + 'id': 1, 'name': 'Alice', 'age': 30, 'email': None + }, + { + 'id': 2, 'name': 'Bob', 'age': 25, 'email': 'bob@example.com' + }, + { + 'id': 3, 'name': 'Charlie', 'age': None, 'email': None + }, + { + 'id': 4, 'name': 'David', 'age': None, 'email': None + }, + ] + + assert_that( + readback, + equal_to([json.dumps(r, sort_keys=True) for r in expected_data])) + def test_batched_read(self): with TemporaryDirectory() as tmp_dirname: path = os.path.join(tmp_dirname + "tmp_filename") From 061191ff66e09937152e5b11b0a907bcfbeab5dd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 13:55:27 -0400 Subject: [PATCH 20/29] Bump cloud.google.com/go/storage from 1.56.0 to 1.56.1 in /sdks (#35980) Bumps [cloud.google.com/go/storage](https://github.com/googleapis/google-cloud-go) from 1.56.0 to 1.56.1. - [Release notes](https://github.com/googleapis/google-cloud-go/releases) - [Changelog](https://github.com/googleapis/google-cloud-go/blob/main/CHANGES.md) - [Commits](https://github.com/googleapis/google-cloud-go/compare/spanner/v1.56.0...storage/v1.56.1) --- updated-dependencies: - dependency-name: cloud.google.com/go/storage dependency-version: 1.56.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- sdks/go.mod | 6 +++--- sdks/go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sdks/go.mod b/sdks/go.mod index b3fac9865115..61431674adee 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -31,7 +31,7 @@ require ( cloud.google.com/go/profiler v0.4.3 cloud.google.com/go/pubsub v1.50.0 cloud.google.com/go/spanner v1.83.0 - cloud.google.com/go/storage v1.56.0 + cloud.google.com/go/storage v1.56.1 github.com/aws/aws-sdk-go-v2 v1.38.1 github.com/aws/aws-sdk-go-v2/config v1.31.2 github.com/aws/aws-sdk-go-v2/credentials v1.18.7 @@ -136,7 +136,7 @@ require ( ) require ( - cloud.google.com/go v0.121.4 // indirect + cloud.google.com/go v0.121.6 // indirect cloud.google.com/go/compute/metadata v0.8.0 // indirect cloud.google.com/go/iam v1.5.2 // indirect cloud.google.com/go/longrunning v0.6.7 // indirect @@ -205,6 +205,6 @@ require ( golang.org/x/mod v0.26.0 // indirect golang.org/x/tools v0.35.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20250721164621-a45f3dfb1074 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c // indirect ) diff --git a/sdks/go.sum b/sdks/go.sum index 87152805d8b5..ec9c44efe0fa 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -40,8 +40,8 @@ cloud.google.com/go v0.104.0/go.mod h1:OO6xxXdJyvuJPcEPBLN9BJPD+jep5G1+2U5B5gkRY cloud.google.com/go v0.105.0/go.mod h1:PrLgOJNe5nfE9UMxKxgXj4mD3voiP+YQ6gdt6KMFOKM= cloud.google.com/go v0.107.0/go.mod h1:wpc2eNrD7hXUTy8EKS10jkxpZBjASrORK7goS+3YX2I= cloud.google.com/go v0.110.0/go.mod h1:SJnCLqQ0FCFGSZMUNUf84MV3Aia54kn7pi8st7tMzaY= -cloud.google.com/go v0.121.4 h1:cVvUiY0sX0xwyxPwdSU2KsF9knOVmtRyAMt8xou0iTs= -cloud.google.com/go v0.121.4/go.mod h1:XEBchUiHFJbz4lKBZwYBDHV/rSyfFktk737TLDU089s= +cloud.google.com/go v0.121.6 h1:waZiuajrI28iAf40cWgycWNgaXPO06dupuS+sgibK6c= +cloud.google.com/go v0.121.6/go.mod h1:coChdst4Ea5vUpiALcYKXEpR1S9ZgXbhEzzMcMR66vI= cloud.google.com/go/accessapproval v1.4.0/go.mod h1:zybIuC3KpDOvotz59lFe5qxRZx6C75OtwbisN56xYB4= cloud.google.com/go/accessapproval v1.5.0/go.mod h1:HFy3tuiGvMdcd/u+Cu5b9NkO1pEICJ46IR82PoUdplw= cloud.google.com/go/accessapproval v1.6.0/go.mod h1:R0EiYnwV5fsRFiKZkPHr6mwyk2wxUJ30nL4j2pcFY2E= @@ -575,8 +575,8 @@ cloud.google.com/go/storage v1.23.0/go.mod h1:vOEEDNFnciUMhBeT6hsJIn3ieU5cFRmzeL cloud.google.com/go/storage v1.27.0/go.mod h1:x9DOL8TK/ygDUMieqwfhdpQryTeEkhGKMi80i/iqR2s= cloud.google.com/go/storage v1.28.1/go.mod h1:Qnisd4CqDdo6BGs2AD5LLnEsmSQ80wQ5ogcBBKhU86Y= cloud.google.com/go/storage v1.29.0/go.mod h1:4puEjyTKnku6gfKoTfNOU/W+a9JyuVNxjpS5GBrB8h4= -cloud.google.com/go/storage v1.56.0 h1:iixmq2Fse2tqxMbWhLWC9HfBj1qdxqAmiK8/eqtsLxI= -cloud.google.com/go/storage v1.56.0/go.mod h1:Tpuj6t4NweCLzlNbw9Z9iwxEkrSem20AetIeH/shgVU= +cloud.google.com/go/storage v1.56.1 h1:n6gy+yLnHn0hTwBFzNn8zJ1kqWfR91wzdM8hjRF4wP0= +cloud.google.com/go/storage v1.56.1/go.mod h1:C9xuCZgFl3buo2HZU/1FncgvvOgTAs/rnh4gF4lMg0s= cloud.google.com/go/storagetransfer v1.5.0/go.mod h1:dxNzUopWy7RQevYFHewchb29POFv3/AaBgnhqzqiK0w= cloud.google.com/go/storagetransfer v1.6.0/go.mod h1:y77xm4CQV/ZhFZH75PLEXY0ROiS7Gh6pSKrM8dJyg6I= cloud.google.com/go/storagetransfer v1.7.0/go.mod h1:8Giuj1QNb1kfLAiWM1bN6dHzfdlDAVC9rv9abHot2W4= @@ -2215,8 +2215,8 @@ google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOl google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4= google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s= -google.golang.org/genproto/googleapis/api v0.0.0-20250721164621-a45f3dfb1074 h1:mVXdvnmR3S3BQOqHECm9NGMjYiRtEvDYcqAqedTXY6s= -google.golang.org/genproto/googleapis/api v0.0.0-20250721164621-a45f3dfb1074/go.mod h1:vYFwMYFbmA8vl6Z/krj/h7+U/AqpHknwJX4Uqgfyc7I= +google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c h1:AtEkQdl5b6zsybXcbz00j1LwNodDuH6hVifIaNqk7NQ= +google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c/go.mod h1:ea2MjsO70ssTfCjiwHgI0ZFqcw45Ksuk2ckf9G468GA= google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c h1:qXWI/sQtv5UKboZ/zUk7h+mrf/lXORyI+n9DKDAusdg= google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c/go.mod h1:gw1tLEfykwDz2ET4a12jcXt4couGAm7IwsVaTy0Sflo= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= From a7e2ac3db07f6d7613ee07eb3838eb11ada1785a Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Wed, 27 Aug 2025 14:09:21 -0400 Subject: [PATCH 21/29] [Prism] Fix segv when docker container self-terminated. (#35977) * Fix segv when docker container is self-terminated * Add some debug logging for docker and process env. --- .../runners/prism/internal/environments.go | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/sdks/go/pkg/beam/runners/prism/internal/environments.go b/sdks/go/pkg/beam/runners/prism/internal/environments.go index 971bb4f83cfa..d18cc3b83732 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/environments.go +++ b/sdks/go/pkg/beam/runners/prism/internal/environments.go @@ -87,7 +87,7 @@ func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *wor return err } go func() { - processEnvironment(ctx, pp, wk) + processEnvironment(ctx, logger, pp, wk) logger.Debug("environment stopped", slog.String("job", j.String())) }() return nil @@ -207,17 +207,18 @@ func dockerEnvironment(ctx context.Context, logger *slog.Logger, dp *pipepb.Dock } logger.Debug("creating container", "envs", envs, "mounts", mounts) + cmd := []string{ + fmt.Sprintf("--id=%v", wk.ID), + fmt.Sprintf("--control_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--artifact_endpoint=%v", artifactEndpoint), + fmt.Sprintf("--provision_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--logging_endpoint=%v", wk.Endpoint()), + } ccr, err := cli.ContainerCreate(ctx, &container.Config{ Image: dp.GetContainerImage(), - Cmd: []string{ - fmt.Sprintf("--id=%v", wk.ID), - fmt.Sprintf("--control_endpoint=%v", wk.Endpoint()), - fmt.Sprintf("--artifact_endpoint=%v", artifactEndpoint), - fmt.Sprintf("--provision_endpoint=%v", wk.Endpoint()), - fmt.Sprintf("--logging_endpoint=%v", wk.Endpoint()), - }, - Env: envs, - Tty: false, + Cmd: cmd, + Env: envs, + Tty: false, }, &container.HostConfig{ NetworkMode: "host", Mounts: mounts, @@ -236,6 +237,7 @@ func dockerEnvironment(ctx context.Context, logger *slog.Logger, dp *pipepb.Dock } logger.Debug("container started") + logger.Debug("container start command", "cmd", cmd) // Start goroutine to wait on container state. go func() { @@ -273,6 +275,7 @@ func dockerEnvironment(ctx context.Context, logger *slog.Logger, dp *pipepb.Dock rc, err := cli.ContainerLogs(bgctx, containerID, container.LogsOptions{Details: true, ShowStdout: true, ShowStderr: true}) if err != nil { logger.Error("docker container logs error", "error", err) + return } defer rc.Close() var buf bytes.Buffer @@ -284,8 +287,9 @@ func dockerEnvironment(ctx context.Context, logger *slog.Logger, dp *pipepb.Dock return nil } -func processEnvironment(ctx context.Context, pp *pipepb.ProcessPayload, wk *worker.W) { - cmd := exec.CommandContext(ctx, pp.GetCommand(), "--id="+wk.ID, "--provision_endpoint="+wk.Endpoint()) +func processEnvironment(ctx context.Context, logger *slog.Logger, pp *pipepb.ProcessPayload, wk *worker.W) { + cmd := exec.CommandContext(ctx, pp.GetCommand(), "--id='"+wk.ID+"'", "--provision_endpoint="+wk.Endpoint()) + logger.Debug("starting process", "cmd", cmd.String()) cmd.WaitDelay = time.Millisecond * 100 cmd.Stderr = os.Stderr @@ -296,9 +300,12 @@ func processEnvironment(ctx context.Context, pp *pipepb.ProcessPayload, wk *work cmd.Env = append(cmd.Environ(), fmt.Sprintf("%v=%v", k, v)) } if err := cmd.Start(); err != nil { + logger.Error("process failed to start", "error", err) return } // Job processing happens here, but orchestrated by other goroutines // This call blocks until the context is cancelled, or the command exits. - cmd.Wait() + if err := cmd.Wait(); err != nil { + logger.Error("process failed while running", "error", err) + } } From 09beeaadd148009e312df796e8513fd882aba51e Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 27 Aug 2025 14:27:36 -0400 Subject: [PATCH 22/29] add a jinja % include/import pipeline example to docs (#35931) * add a jinja include pipeline example * update yaml doc with import example * address gemini and other comments * fix table of contents for readme * add link to jinja pipeline examples --- .../apache_beam/yaml/examples/README.md | 1 + .../content/en/documentation/sdks/yaml.md | 76 ++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/yaml/examples/README.md b/sdks/python/apache_beam/yaml/examples/README.md index 8c6356f68a38..75098d212cd9 100644 --- a/sdks/python/apache_beam/yaml/examples/README.md +++ b/sdks/python/apache_beam/yaml/examples/README.md @@ -28,6 +28,7 @@ * [Blueprints](#blueprints) * [Element-wise](#element-wise) * [IO](#io) + * [Jinja](#jinja) * [ML](#ml) diff --git a/website/www/site/content/en/documentation/sdks/yaml.md b/website/www/site/content/en/documentation/sdks/yaml.md index 73d1eebaae95..33fad5b25506 100644 --- a/website/www/site/content/en/documentation/sdks/yaml.md +++ b/website/www/site/content/en/documentation/sdks/yaml.md @@ -708,7 +708,7 @@ the yaml file can be parameterized with externally provided variables using the [jinja variable syntax](https://jinja.palletsprojects.com/en/stable/templates/#variables). The values are then passed via a `--jinja_variables` command line flag. -For example, one could start a pipeline with +For example, one could start a pipeline with: ``` pipeline: @@ -742,6 +742,80 @@ or writing dated sources and sinks, e.g. would write to files like `gs://path/to/2016/08/04/dated-output*.json`. +A user can also use the `% include` directive to pull in other common templates: + +/pipeline.yaml +```yaml +pipeline: + transforms: + - name: Read from GCS + type: ReadFromText + config: +# NOTE: For include, the indentation has to line up correctly for it to be +# parsed correctly. So in this example the included readFromText.yaml has +# already indented yaml lines to line up correctly when including into this +# pipeline here. +{% include '/submodules/readFromText.yaml' %} + - name: Write to GCS + type: WriteToText + input: Read from GCS + config: + path: "gs://MY-BUCKET/wordCounts/" +``` + +/submodules/readFromText.yaml +```yaml + path: {{readFromText.path}} +``` + +This pipeline can be run like this: + +```sh +python -m apache_beam.yaml.main \ + --yaml_pipeline_file=pipeline.yaml \ + --jinja_variables='{"readFromText": {"path": "gs://dataflow-samples/shakespeare/kinglear.txt"}}' +``` + +The `% import` jinja directive can also be used to pull in macros: + +/pipeline.yaml +```yaml +{% import '/macros.yaml' as macros %} + +pipeline: + type: chain + transforms: + +# Read in text file +{{ macros.readFromText(readFromText) | indent(4, true) }} + +# Write to text file on GCS, locally, etc + - name: Write to GCS + type: WriteToText + input: Read from GCS + config: + path: "gs://MY-BUCKET/wordCounts/" +``` + +/macros.yaml +```yaml +{%- macro readFromText(params) -%} +- name: Read from GCS + type: ReadFromText + config: + path: "{{ params.path }}" +{%- endmacro -%} +``` + +This pipeline can be run with the same command as in the `% include` example +above. + +There are many more ways to import and even use template inheritance using +Jinja as seen [here](https://jinja.palletsprojects.com/en/stable/templates/#import) +and [here](https://jinja.palletsprojects.com/en/stable/templates/#inheritance). + +Full jinja pipeline examples can be found [here](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/yaml/examples/transforms/jinja). + ## Other Resources * [Example pipeline](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/yaml/examples) From 45492145ae58e0b8a65a64f92aa2061d272d8b21 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 14:49:33 -0400 Subject: [PATCH 23/29] Bump github.com/aws/aws-sdk-go-v2/config from 1.31.2 to 1.31.3 in /sdks (#35983) --- sdks/go.mod | 2 +- sdks/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/go.mod b/sdks/go.mod index 61431674adee..7aef3b6ca0c1 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -33,7 +33,7 @@ require ( cloud.google.com/go/spanner v1.83.0 cloud.google.com/go/storage v1.56.1 github.com/aws/aws-sdk-go-v2 v1.38.1 - github.com/aws/aws-sdk-go-v2/config v1.31.2 + github.com/aws/aws-sdk-go-v2/config v1.31.3 github.com/aws/aws-sdk-go-v2/credentials v1.18.7 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.18.5 github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0 diff --git a/sdks/go.sum b/sdks/go.sum index ec9c44efe0fa..76aa0b7a5af3 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -757,8 +757,8 @@ github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 h1:6GMWV6CNpA/6fbFH github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0/go.mod h1:/mXlTIVG9jbxkqDnr5UQNQxW1HRYxeGklkM9vAFeabg= github.com/aws/aws-sdk-go-v2/config v1.15.3/go.mod h1:9YL3v07Xc/ohTsxFXzan9ZpFpdTOFl4X65BAKYaz8jg= github.com/aws/aws-sdk-go-v2/config v1.25.3/go.mod h1:tAByZy03nH5jcq0vZmkcVoo6tRzRHEwSFx3QW4NmDw8= -github.com/aws/aws-sdk-go-v2/config v1.31.2 h1:NOaSZpVGEH2Np/c1toSeW0jooNl+9ALmsUTZ8YvkJR0= -github.com/aws/aws-sdk-go-v2/config v1.31.2/go.mod h1:17ft42Yb2lF6OigqSYiDAiUcX4RIkEMY6XxEMJsrAes= +github.com/aws/aws-sdk-go-v2/config v1.31.3 h1:RIb3yr/+PZ18YYNe6MDiG/3jVoJrPmdoCARwNkMGvco= +github.com/aws/aws-sdk-go-v2/config v1.31.3/go.mod h1:jjgx1n7x0FAKl6TnakqrpkHWWKcX3xfWtdnIJs5K9CE= github.com/aws/aws-sdk-go-v2/credentials v1.11.2/go.mod h1:j8YsY9TXTm31k4eFhspiQicfXPLZ0gYXA50i4gxPE8g= github.com/aws/aws-sdk-go-v2/credentials v1.16.2/go.mod h1:sDdvGhXrSVT5yzBDR7qXz+rhbpiMpUYfF3vJ01QSdrc= github.com/aws/aws-sdk-go-v2/credentials v1.18.7 h1:zqg4OMrKj+t5HlswDApgvAHjxKtlduKS7KicXB+7RLg= From 8c6ff9a21a16c015185856f261655f272ee440e1 Mon Sep 17 00:00:00 2001 From: Enrique Calderon <71863693+ksobrenat32@users.noreply.github.com> Date: Wed, 27 Aug 2025 12:56:49 -0600 Subject: [PATCH 24/29] Add a security GCP log analyzer (#35922) * Add the base log_analyzer * Add github action for security logging * Enhance LogAnalyzer to filter logs by time range and include file names in event summary * Add dry-run option for weekly email report generation in LogAnalyzer * Better error handling for timezones and missing details * Refactor LogAnalyzer to use SinkCls for type consistency and enhance bucket permission management for log sinks --- .../beam_Infrastructure_SecurityLogging.yml | 77 ++++ infra/security/README.md | 84 +++++ infra/security/config.yml | 43 +++ infra/security/log_analyzer.py | 333 ++++++++++++++++++ infra/security/requirements.txt | 19 + 5 files changed, 556 insertions(+) create mode 100644 .github/workflows/beam_Infrastructure_SecurityLogging.yml create mode 100644 infra/security/README.md create mode 100644 infra/security/config.yml create mode 100644 infra/security/log_analyzer.py create mode 100644 infra/security/requirements.txt diff --git a/.github/workflows/beam_Infrastructure_SecurityLogging.yml b/.github/workflows/beam_Infrastructure_SecurityLogging.yml new file mode 100644 index 000000000000..c364056f5683 --- /dev/null +++ b/.github/workflows/beam_Infrastructure_SecurityLogging.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This workflow works with the GCP security log analyzer to +# generate weekly security reports and initialize log sinks + +name: GCP Security Log Analyzer + +on: + workflow_dispatch: + schedule: + # Once a week at 9:00 AM on Monday + - cron: '0 9 * * 1' + push: + paths: + - 'infra/security/config.yml' + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.sender.login }}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + contents: read + +jobs: + beam_GCP_Security_LogAnalyzer: + name: GCP Security Log Analysis + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.13' + + - name: Install Python dependencies + working-directory: ./infra/security + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Setup gcloud + uses: google-github-actions/setup-gcloud@v2 + + - name: Initialize Log Sinks + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' + working-directory: ./infra/security + run: python log_analyzer.py --config config.yml initialize + + - name: Generate Weekly Security Report + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + working-directory: ./infra/security + env: + SMTP_SERVER: smtp.gmail.com + SMTP_PORT: 465 + EMAIL_ADDRESS: ${{ secrets.ISSUE_REPORT_SENDER_EMAIL_ADDRESS }} + EMAIL_PASSWORD: ${{ secrets.ISSUE_REPORT_SENDER_EMAIL_PASSWORD }} + EMAIL_RECIPIENT: "dev@beam.apache.org" + run: python log_analyzer.py --config config.yml generate-report --dry-run diff --git a/infra/security/README.md b/infra/security/README.md new file mode 100644 index 000000000000..0e60c4b33043 --- /dev/null +++ b/infra/security/README.md @@ -0,0 +1,84 @@ + + +# GCP Security Analyzer + +This document describes the implementation of a security analyzer for Google Cloud Platform (GCP) resources. The analyzer is designed to enhance security monitoring within our GCP environment by capturing critical events and generating alerts for specific security-sensitive actions. + +## How It Works + +1. **Log Sinks**: The system uses [GCP Log Sinks](https://cloud.google.com/logging/docs/export/configure_export_v2) to capture specific security-related log entries. These sinks are configured to filter for events like IAM policy changes or service account key creation. +2. **Log Storage**: The filtered logs are routed to a dedicated Google Cloud Storage (GCS) bucket for persistence and analysis. +3. **Report Generation**: A scheduled job runs weekly, executing the `log_analyzer.py` script. +4. **Email Alerts**: The script analyzes the logs from the past week, compiles a summary of security events, and sends a report to a configured email address. + +## Configuration + +The behavior of the log analyzer is controlled by a `config.yml` file. Here’s an overview of the configuration options: + +- `project_id`: The GCP project ID where the resources are located. +- `bucket_name`: The name of the GCS bucket where logs will be stored. +- `logging`: Configures the logging level and format for the script. +- `sinks`: A list of log sinks to be created. Each sink has the following properties: + - `name`: A unique name for the sink. + - `description`: A brief description of what the sink monitors. + - `filter_methods`: A list of GCP API methods to include in the filter (e.g., `SetIamPolicy`). + - `excluded_principals`: A list of service accounts or user emails to exclude from monitoring, such as CI/CD service accounts. + +### Example Configuration (`config.yml`) + +```yaml +project_id: your-gcp-project-id +bucket_name: your-log-storage-bucket + +sinks: + - name: iam-policy-changes + description: Monitors changes to IAM policies. + filter_methods: + - "SetIamPolicy" + excluded_principals: + - "ci-cd-account@your-project.iam.gserviceaccount.com" +``` + +## Usage + +The `log_analyzer.py` script provides two main commands for managing the security analyzer. + +### Initializing Sinks + +To create or update the log sinks in GCP based on your `config.yml` file, run the following command: + +```bash +python log_analyzer.py --config config.yml initialize +``` + +This command ensures that the log sinks are correctly configured to capture the desired security events. + +### Generating Weekly Reports + +To generate and send the weekly security report, run this command: + +```bash +python log_analyzer.py --config config.yml generate-report +``` + +This is typically run as a scheduled job (GitHub Action) to automate the delivery of weekly security reports. + + + diff --git a/infra/security/config.yml b/infra/security/config.yml new file mode 100644 index 000000000000..9565623be16d --- /dev/null +++ b/infra/security/config.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project_id: testing-me-460223 + +# Logging +logging: + level: DEBUG + format: "[%(asctime)s] %(levelname)s: %(message)s" + +# gcloud storage bucket +bucket_name: "testing-me-460223-tfstate" + +# GCP Log sinks +sinks: + - name: iam-policy-changes + description: Monitors changes to IAM policies, excluding approved CI/CD service accounts. + filter_methods: + - "SetIamPolicy" + excluded_principals: + - beam-github-actions@apache-beam-testing.iam.gserviceaccount.com + - github-self-hosted-runners@apache-beam-testing.iam.gserviceaccount.com + + - name: sa-key-management + description: Monitors creation and deletion of service account keys. + filter_methods: + - "google.iam.admin.v1.IAM.CreateServiceAccountKey" + - "google.iam.admin.v1.IAM.DeleteServiceAccountKey" + excluded_principals: + - beam-github-actions@apache-beam-testing.iam.gserviceaccount.com + - github-self-hosted-runners@apache-beam-testing.iam.gserviceaccount.com diff --git a/infra/security/log_analyzer.py b/infra/security/log_analyzer.py new file mode 100644 index 000000000000..55ab4495e24f --- /dev/null +++ b/infra/security/log_analyzer.py @@ -0,0 +1,333 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import ssl +import yaml +import logging +import smtplib +import os +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from google.cloud import logging_v2 +from google.cloud import storage +from typing import List, Dict, Any +import argparse + +REPORT_SUBJECT = "Weekly IAM Security Events Report" +REPORT_BODY_TEMPLATE = """ +Hello Team, + +Please find below the summary of IAM security events for the past week: + +{event_summary} + +Best Regards, +Automated GitHub Action +""" + +@dataclass +class SinkCls: + name: str + description: str + filter_methods: List[str] + excluded_principals: List[str] + +class LogAnalyzer(): + def __init__(self, project_id: str, gcp_bucket: str, logger: logging.Logger, sinks: List[SinkCls]): + self.project_id = project_id + self.bucket = gcp_bucket + self.logger = logger + self.sinks = sinks + + def _construct_filter(self, sink: SinkCls) -> str: + """ + Constructs a filter string for a given sink. + + Args: + sink (Sink): The sink object containing filter information. + + Returns: + str: The constructed filter string. + """ + + method_filters = [] + for method in sink.filter_methods: + method_filters.append(f'protoPayload.methodName="{method}"') + + exclusion_filters = [] + for principal in sink.excluded_principals: + exclusion_filters.append(f'protoPayload.authenticationInfo.principalEmail != "{principal}"') + + if method_filters and exclusion_filters: + filter_ = f"({' OR '.join(method_filters)}) AND ({' AND '.join(exclusion_filters)})" + elif method_filters: + filter_ = f"({' OR '.join(method_filters)})" + elif exclusion_filters: + filter_ = f"({' AND '.join(exclusion_filters)})" + else: + filter_ = "" + + return filter_ + + def _create_log_sink(self, sink: SinkCls) -> None: + """ + Creates a log sink in GCP if it doesn't already exist. + If it already exists, it updates the sink with the new filter in case the filter has changed. + + Args: + sink (Sink): The sink object to create. + """ + logging_client = logging_v2.Client(project=self.project_id) + filter_ = self._construct_filter(sink) + destination = "storage.googleapis.com/{bucket}".format(bucket=self.bucket) + + sink_client = logging_client.sink(sink.name, filter_=filter_, destination=destination) + + if sink_client.exists(): + self.logger.debug(f"Sink {sink.name} already exists.") + sink_client.reload() + if sink_client.filter_ != filter_: + sink_client.filter_ = filter_ + sink_client.update() + self.logger.info(f"Updated sink {sink.name}'s filter.") + else: + sink_client.create() + self.logger.info(f"Created sink {sink.name}.") + # Reload the sink to get the writer_identity, this may take a few moments + sink_client.reload() + + self._grant_bucket_permissions(sink_client) + + logging_client.close() + + def _grant_bucket_permissions(self, sink: logging_v2.Sink) -> None: + """ + Grants a log sink's writer identity permissions to write to the bucket. + """ + logging_client = logging_v2.Client(project=self.project_id) + storage_client = storage.Client(project=self.project_id) + + sink.reload() + writer_identity = sink.writer_identity + if not writer_identity: + self.logger.warning(f"Could not retrieve writer identity for sink {sink.name}. " + f"Manual permission granting might be required.") + return + + bucket = storage_client.get_bucket(self.bucket) + policy = bucket.get_iam_policy(requested_policy_version=3) + iam_role = "roles/storage.objectCreator" + + # Workaround for projects where the writer_identity is not a valid service account. + if writer_identity == "serviceAccount:cloud-logs@system.gserviceaccount.com": + member = "group:cloud-logs@google.com" + else: + member = f"serviceAccount:{writer_identity}" + + # Check if the policy is already configured + if any(member in b.get("members", []) and b.get("role") == iam_role for b in policy.bindings): + self.logger.debug(f"Sink {sink.name} already has the necessary permissions.") + return + + policy.bindings.append({ + "role": iam_role, + "members": {member} + }) + + bucket.set_iam_policy(policy) + self.logger.info(f"Granted {iam_role} to {member} on bucket {self.bucket} for sink {sink.name}.") + + def initialize_sinks(self) -> None: + for sink in self.sinks: + self._create_log_sink(sink) + self.logger.info(f"Initialized sink: {sink.name}") + + def get_event_logs(self, days: int = 7) -> List[Dict[str, Any]]: + """ + Reads and retrieves log events from the specified time range from the GCP Cloud Storage bucket. + + Args: + days (int): The number of days to look back for log analysis. + + Returns: + List[Dict[str, Any]]: A list of log entries that match the specified time range. + """ + found_events = [] + storage_client = storage.Client(project=self.project_id) + + now = datetime.now(timezone.utc) + end_time = now.replace(minute=0, second=0, microsecond=0) - timedelta(minutes=30) + start_time = end_time - timedelta(days=days) + + blobs = storage_client.list_blobs(self.bucket) + for blob in blobs: + if not (start_time <= blob.time_created < end_time): + continue + + self.logger.debug(f"Processing blob: {blob.name}") + content = blob.download_as_string().decode("utf-8") + + for num, line in enumerate(content.splitlines(), 1): + try: + log_entry = json.loads(line) + payload = log_entry.get("protoPayload") + if not payload: + self.logger.warning(f"Skipping log in blob {blob.name}, line {num}: no protoPayload found.") + continue + + event_details = { + "timestamp": log_entry.get("timestamp", "N/A"), + "principal": payload.get("authenticationInfo", {}).get("principalEmail", "N/A"), + "method": payload.get("methodName", "N/A"), + "resource": payload.get("resourceName", "N/A"), + "project_id": log_entry.get("resource", {}).get("labels", {}).get("project_id", "N/A"), + "file_name": blob.name + } + found_events.append(event_details) + except json.JSONDecodeError: + self.logger.warning(f"Skipping invalid JSON log in blob {blob.name}, line {num}.") + continue + + storage_client.close() + return found_events + + def create_weekly_email_report(self, dry_run: bool = False) -> None: + """ + Creates an email report based on the events found this week. + If `dry_run` is True, it will print the report to the console instead of sending it. + """ + events = self.get_event_logs(days=7) + if not events: + self.logger.info("No events found for the weekly report.") + return + + events.sort(key=lambda x: x['timestamp'], reverse=True) + event_summary = "\n".join( + f"Timestamp: {event['timestamp']}, Principal: {event['principal']}, Method: {event['method']}, Resource: {event['resource']}, Project ID: {event['project_id']}, File: {event['file_name']}" + for event in events + ) + + report_subject = REPORT_SUBJECT + report_body = REPORT_BODY_TEMPLATE.format(event_summary=event_summary) + + if dry_run: + self.logger.info("Dry run: printing email report to console.") + print(f"Subject: {report_subject}\n") + print(f"Body:\n{report_body}") + return + + self.send_email(report_subject, report_body) + + def send_email(self, subject: str, body: str) -> None: + """ + Sends an email with the specified subject and body. + If email configuration is not fully set, it prints the email instead. + + Args: + subject (str): The subject of the email. + body (str): The body of the email. + """ + smtp_server = os.getenv("SMTP_SERVER") + smtp_port_str = os.getenv("SMTP_PORT") + recipient = os.getenv("EMAIL_RECIPIENT") + email = os.getenv("EMAIL_ADDRESS") + password = os.getenv("EMAIL_PASSWORD") + + if not all([smtp_server, smtp_port_str, recipient, email, password]): + self.logger.warning("Email configuration is not fully set. Printing email instead.") + print(f"Subject: {subject}\n") + print(f"Body:\n{body}") + return + + assert smtp_server is not None + assert smtp_port_str is not None + assert recipient is not None + assert email is not None + assert password is not None + + message = f"Subject: {subject}\n\n{body}" + context = ssl.create_default_context() + + try: + smtp_port = int(smtp_port_str) + with smtplib.SMTP_SSL(smtp_server, smtp_port, context=context) as server: + server.login(email, password) + server.sendmail(email, recipient, message) + self.logger.info(f"Successfully sent email report to {recipient}") + except Exception as e: + self.logger.error(f"Failed to send email report: {e}") + +def load_config_from_yaml(config_path: str) -> Dict[str, Any]: + with open(config_path, 'r') as file: + config = yaml.safe_load(file) + + c = { + "project_id": config.get("project_id"), + "gcp_bucket": config.get("bucket_name"), + "sinks": [], + "logger": logging.getLogger(__name__) + } + + for sink_config in config.get("sinks", []): + sink = SinkCls( + name=sink_config["name"], + description=sink_config["description"], + filter_methods=sink_config.get("filter_methods", []), + excluded_principals=sink_config.get("excluded_principals", []) + ) + c["sinks"].append(sink) + + logging_config = config.get("logging", {}) + log_level = logging_config.get("level", "INFO") + log_format = logging_config.get("format", "[%(asctime)s] %(levelname)s: %(message)s") + + c["logger"].setLevel(log_level) + logging.basicConfig(level=log_level, format=log_format) + + return c + +def main(): + """ + Main entry point for the script. + """ + parser = argparse.ArgumentParser(description="GCP IAM Log Analyzer") + parser.add_argument("--config", required=True, help="Path to the configuration YAML file.") + + subparsers = parser.add_subparsers(dest="command", required=True) + + subparsers.add_parser("initialize", help="Initialize/update log sinks in GCP.") + report_parser = subparsers.add_parser("generate-report", help="Generate and send the weekly IAM security report.") + report_parser.add_argument("--dry-run", action="store_true", help="Do not send email, print report to console.") + + args = parser.parse_args() + + config = load_config_from_yaml(args.config) + log_analyzer = LogAnalyzer( + project_id=config["project_id"], + gcp_bucket=config["gcp_bucket"], + logger=config["logger"], + sinks=config["sinks"] + ) + + if args.command == "initialize": + log_analyzer.initialize_sinks() + log_analyzer.logger.info("Sinks initialized successfully.") + elif args.command == "generate-report": + log_analyzer.create_weekly_email_report(dry_run=args.dry_run) + log_analyzer.logger.info("Weekly report generation process completed.") + +if __name__ == "__main__": + main() diff --git a/infra/security/requirements.txt b/infra/security/requirements.txt new file mode 100644 index 000000000000..a4abb8bc5acf --- /dev/null +++ b/infra/security/requirements.txt @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PyYAML==6.0.2 +google-cloud-storage==3.3.0 +google-cloud-logging==3.12.1 From a7ec1ae5332ef49643cfe1bf908c2c104b51ba13 Mon Sep 17 00:00:00 2001 From: Ahmed Abualsaud <65791736+ahmedabu98@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:14:19 -0400 Subject: [PATCH 25/29] update py containers (#35982) --- sdks/python/apache_beam/runners/dataflow/internal/names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py index 00d4624d202d..cf9bf6208dc5 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/names.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -34,6 +34,6 @@ # Unreleased sdks use container image tag specified below. # Update this tag whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. -BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20250811' +BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20250827' DATAFLOW_CONTAINER_IMAGE_REPOSITORY = 'gcr.io/cloud-dataflow/v1beta3' From ac008074fa54981b47bc47888c864d09e26ea2f7 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 27 Aug 2025 16:20:40 -0400 Subject: [PATCH 26/29] [YAML]: add import jinja pipeline example (#35945) * add import jinja pipeline example * revert name change * update overall examples readme * fix lint issue * fix gemini small issue * Update sdks/python/apache_beam/yaml/examples/transforms/jinja/import/README.md --------- Co-authored-by: tvalentyn --- .../apache_beam/yaml/examples/README.md | 4 ++ .../yaml/examples/testing/examples_test.py | 15 ++-- .../yaml/examples/testing/input_data.py | 36 ++++++---- .../transforms/jinja/import/README.md | 63 +++++++++++++++++ .../jinja/import/macros/wordCountMacros.yaml | 64 +++++++++++++++++ .../jinja/import/wordCountImport.yaml | 69 +++++++++++++++++++ 6 files changed, 230 insertions(+), 21 deletions(-) create mode 100644 sdks/python/apache_beam/yaml/examples/transforms/jinja/import/README.md create mode 100644 sdks/python/apache_beam/yaml/examples/transforms/jinja/import/macros/wordCountMacros.yaml create mode 100644 sdks/python/apache_beam/yaml/examples/transforms/jinja/import/wordCountImport.yaml diff --git a/sdks/python/apache_beam/yaml/examples/README.md b/sdks/python/apache_beam/yaml/examples/README.md index 75098d212cd9..b053e3e6236d 100644 --- a/sdks/python/apache_beam/yaml/examples/README.md +++ b/sdks/python/apache_beam/yaml/examples/README.md @@ -245,6 +245,10 @@ by leveraging Jinja templating engine for dynamic pipeline generation based on inputs from the user through `% include`, `% import`, and inheritance directives. +Jinja `% import` directive: +- [wordCountImport.yaml](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/wordCountImport.yaml) +- [Instructions](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/README.md) on how to run the pipeline. + Jinja `% include` directive: - [wordCountInclude.yaml](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/jinja/include/wordCountInclude.yaml) - [Instructions](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/jinja/include/README.md) on how to run the pipeline. diff --git a/sdks/python/apache_beam/yaml/examples/testing/examples_test.py b/sdks/python/apache_beam/yaml/examples/testing/examples_test.py index 0bfcb3f61612..80e82945523c 100644 --- a/sdks/python/apache_beam/yaml/examples/testing/examples_test.py +++ b/sdks/python/apache_beam/yaml/examples/testing/examples_test.py @@ -353,7 +353,8 @@ def test_yaml_example(self): ] if jinja_preprocessor: jinja_preprocessor = jinja_preprocessor[0] - raw_spec_string = jinja_preprocessor(raw_spec_string) + raw_spec_string = jinja_preprocessor( + raw_spec_string, self._testMethodName) custom_preprocessors.remove(jinja_preprocessor) pipeline_spec = yaml.load( @@ -563,7 +564,7 @@ def _wordcount_minimal_test_preprocessor( @YamlExamplesTestSuite.register_test_preprocessor( - ['test_wordCountInclude_yaml']) + ['test_wordCountInclude_yaml', 'test_wordCountImport_yaml']) def _wordcount_jinja_test_preprocessor( test_spec: dict, expected: List[str], env: TestEnvironment): """ @@ -676,7 +677,8 @@ def _kafka_test_preprocessor( 'test_iceberg_migration_yaml', 'test_ml_preprocessing_yaml', 'test_anomaly_scoring_yaml', - 'test_wordCountInclude_yaml' + 'test_wordCountInclude_yaml', + 'test_wordCountImport_yaml' ]) def _io_write_test_preprocessor( test_spec: dict, expected: List[str], env: TestEnvironment): @@ -1253,8 +1255,8 @@ def _batch_log_analysis_test_preprocessor( @YamlExamplesTestSuite.register_test_preprocessor( - ['test_wordCountInclude_yaml']) -def _jinja_preprocessor(raw_spec_string: str): + ['test_wordCountInclude_yaml', 'test_wordCountImport_yaml']) +def _jinja_preprocessor(raw_spec_string: str, test_name: str): """ Preprocessor for Jinja-based YAML tests. @@ -1274,12 +1276,11 @@ def _jinja_preprocessor(raw_spec_string: str): Returns: A string containing the fully rendered YAML pipeline specification. """ - jinja_variables = json.loads(input_data.word_count_jinja_parameter_data()) test_file_dir = os.path.dirname(__file__) sdk_root = os.path.abspath(os.path.join(test_file_dir, '../../../..')) - include_files = input_data.word_count_jinja_template_data() + include_files = input_data.word_count_jinja_template_data(test_name) mock_templates = {'main_template': raw_spec_string} for file_path in include_files: full_path = os.path.join(sdk_root, file_path) diff --git a/sdks/python/apache_beam/yaml/examples/testing/input_data.py b/sdks/python/apache_beam/yaml/examples/testing/input_data.py index 50d40224f828..fb468567355d 100644 --- a/sdks/python/apache_beam/yaml/examples/testing/input_data.py +++ b/sdks/python/apache_beam/yaml/examples/testing/input_data.py @@ -65,20 +65,28 @@ def word_count_jinja_parameter_data(): return json.dumps(params) -def word_count_jinja_template_data(): - return \ -[('apache_beam/yaml/examples/transforms/jinja/' - 'include/submodules/readFromTextTransform.yaml'), - ('apache_beam/yaml/examples/transforms/jinja/' - 'include/submodules/mapToFieldsSplitConfig.yaml'), - ('apache_beam/yaml/examples/transforms/jinja/' - 'include/submodules/explodeTransform.yaml'), - ('apache_beam/yaml/examples/transforms/jinja/' - 'include/submodules/combineTransform.yaml'), - ('apache_beam/yaml/examples/transforms/jinja/' - 'include/submodules/mapToFieldsCountConfig.yaml'), - ('apache_beam/yaml/examples/transforms/jinja/' - 'include/submodules/writeToTextTransform.yaml')] +def word_count_jinja_template_data(test_name: str) -> list[str]: + if test_name == 'test_wordCountInclude_yaml': + return [ + 'apache_beam/yaml/examples/transforms/jinja/' + 'include/submodules/readFromTextTransform.yaml', + 'apache_beam/yaml/examples/transforms/jinja/' + 'include/submodules/mapToFieldsSplitConfig.yaml', + 'apache_beam/yaml/examples/transforms/jinja/' + 'include/submodules/explodeTransform.yaml', + 'apache_beam/yaml/examples/transforms/jinja/' + 'include/submodules/combineTransform.yaml', + 'apache_beam/yaml/examples/transforms/jinja/' + 'include/submodules/mapToFieldsCountConfig.yaml', + 'apache_beam/yaml/examples/transforms/jinja/' + 'include/submodules/writeToTextTransform.yaml' + ] + elif test_name == 'test_wordCountImport_yaml': + return [ + 'apache_beam/yaml/examples/transforms/jinja/' + 'import/macros/wordCountMacros.yaml' + ] + return [] def iceberg_dynamic_destinations_users_data(): diff --git a/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/README.md b/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/README.md new file mode 100644 index 000000000000..14052cd3a6c4 --- /dev/null +++ b/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/README.md @@ -0,0 +1,63 @@ + + +## Jinja % import Pipeline + +This example leverages the `% import` Jinja directive by having one main +pipeline and then one macros file containing all the transforms and configs +used. + +General setup: +```sh +export PIPELINE_FILE=apache_beam/yaml/examples/transforms/jinja/import/wordCountImport.yaml +export KINGLEAR="gs://dataflow-samples/shakespeare/kinglear.txt" +export TEMP_LOCATION="gs://MY-BUCKET/wordCounts/" + +cd /beam/sdks/python +``` + +Multiline Run Example: +```sh +python -m apache_beam.yaml.main \ + --yaml_pipeline_file="${PIPELINE_FILE}" \ + --jinja_variables='{ + "readFromTextTransform": {"path": "'"${KINGLEAR}"'"}, + "mapToFieldsSplitConfig": { + "language": "python", + "fields": { + "value": "1" + } + }, + "explodeTransform": {"fields": "word"}, + "combineTransform": { + "group_by": "word", + "combine": {"value": "sum"} + }, + "mapToFieldsCountConfig": { + "language": "python", + "fields": {"output": "word + \" - \" + str(value)"} + }, + "writeToTextTransform": {"path": "'"${TEMP_LOCATION}"'"} + }' +``` + +Single Line Run Example: +```sh +python -m apache_beam.yaml.main --yaml_pipeline_file="${PIPELINE_FILE}" --jinja_variables='{"readFromTextTransform": {"path": "gs://dataflow-samples/shakespeare/kinglear.txt"}, "mapToFieldsSplitConfig": {"language": "python", "fields":{"value":"1"}}, "explodeTransform":{"fields":"word"}, "combineTransform":{"group_by":"word", "combine":{"value":"sum"}}, "mapToFieldsCountConfig":{"language": "python", "fields":{"output":"word + \" - \" + str(value)"}}, "writeToTextTransform":{"path":"${TEMP_LOCATION}"}}' +``` diff --git a/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/macros/wordCountMacros.yaml b/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/macros/wordCountMacros.yaml new file mode 100644 index 000000000000..b3870693ef5f --- /dev/null +++ b/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/macros/wordCountMacros.yaml @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +{%- macro readFromTextTransform(params) -%} + +- name: Read from GCS + type: ReadFromText + config: + path: "{{ params.path }}" +{%- endmacro -%} + +{%- macro mapToFieldsSplitConfig(params) -%} +language: "{{ params.language }}" +fields: + value: "{{ params.fields.value }}" + word: + callable: |- + import re + def my_mapping(row): + return re.findall(r'[A-Za-z\']+', row.line.lower()) +{%- endmacro -%} + +{%- macro explodeTransform(params) -%} +- name: Explode word arrays + type: Explode + config: + fields: "{{ params.fields }}" +{%- endmacro -%} + +{%- macro combineTransform(params) -%} +- name: Count words + type: Combine + config: + group_by: "{{ params.group_by }}" + combine: + value: "{{ params.combine.value }}" +{%- endmacro -%} + +{%- macro mapToFieldsCountConfig(params) -%} +language: "{{ params.language }}" +fields: + output: '{{ params.fields.output }}' +{%- endmacro -%} + +{%- macro writeToTextTransform(params) -%} +- name: Write to GCS + type: WriteToText + config: + path: "{{ params.path }}" +{%- endmacro -%} diff --git a/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/wordCountImport.yaml b/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/wordCountImport.yaml new file mode 100644 index 000000000000..1058a30b607a --- /dev/null +++ b/sdks/python/apache_beam/yaml/examples/transforms/jinja/import/wordCountImport.yaml @@ -0,0 +1,69 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This examples reads from a public file stored on Google Cloud. This +# requires authenticating with Google Cloud, or setting the file in +#`ReadFromText` to a local file. +# +# To set up Application Default Credentials, +# see https://cloud.google.com/docs/authentication/external/set-up-adc. +# +# This pipeline reads in a text file, counts distinct words found in the text, +# then logs a row containing each word and its count. + +{% import 'apache_beam/yaml/examples/transforms/jinja/import/macros/wordCountMacros.yaml' as macros %} + +pipeline: + type: chain + transforms: + +# Read in text file +{{ macros.readFromTextTransform(readFromTextTransform) | indent(4, true) }} + +# Split words and count occurrences + - name: Split words + type: MapToFields + config: +{{ macros.mapToFieldsSplitConfig(mapToFieldsSplitConfig) | indent(8, true) }} + +# Explode into individual words +{{ macros.explodeTransform(explodeTransform) | indent(4, true) }} + +# Group by word +{{ macros.combineTransform(combineTransform) | indent(4, true) }} + +# Format output to a single string consisting of `word - count` + - name: Format output + type: MapToFields + config: +{{ macros.mapToFieldsCountConfig(mapToFieldsCountConfig) | indent(8, true) }} + +# Write to text file on GCS, locally, etc +{{ macros.writeToTextTransform(writeToTextTransform) | indent(4, true) }} + +# Expected: +# Row(output='king - 311') +# Row(output='lear - 253') +# Row(output='dramatis - 1') +# Row(output='personae - 1') +# Row(output='of - 483') +# Row(output='britain - 2') +# Row(output='france - 32') +# Row(output='duke - 26') +# Row(output='burgundy - 20') +# Row(output='cornwall - 75') From bf16b251ad7ae875b768270f5d9a1702a028db99 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 25 Aug 2025 20:16:15 +0000 Subject: [PATCH 27/29] workflows: capture DinD tests in PreCommit Py Coverage workflow --- .../beam_PreCommit_Python_Coverage.yml | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/.github/workflows/beam_PreCommit_Python_Coverage.yml b/.github/workflows/beam_PreCommit_Python_Coverage.yml index b21ad50e9da2..95bc78c9dd5f 100644 --- a/.github/workflows/beam_PreCommit_Python_Coverage.yml +++ b/.github/workflows/beam_PreCommit_Python_Coverage.yml @@ -58,35 +58,44 @@ env: jobs: beam_PreCommit_Python_Coverage: - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - runs-on: [self-hosted, ubuntu-20.04, highmem] + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: job_name: [beam_PreCommit_Python_Coverage] job_phrase: [Run Python_Coverage PreCommit] + python_version: ['3.9'] + # Run on both self-hosted and GitHub-hosted runners. + # Some tests (marked require_docker_in_docker) can't run on Beam's + # self-hosted runners due to Docker-in-Docker environment constraint. + # These tests will only execute on ubuntu-latest (GitHub-hosted). + # Context: https://github.com/apache/beam/pull/35585 + os: [[self-hosted, ubuntu-20.04, highmem], [ubuntu-latest]] timeout-minutes: 180 if: | github.event_name == 'push' || github.event_name == 'pull_request_target' || (github.event_name == 'schedule' && github.repository == 'apache/beam') || github.event_name == 'workflow_dispatch' || - github.event.comment.body == 'Run Python_Coverage PreCommit' + startswith(github.event.comment.body, 'Run Python_Coverage PreCommit 3.') steps: - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: - comment_phrase: ${{ matrix.job_phrase }} + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - name: Setup environment uses: ./.github/actions/setup-environment-action with: java-version: default - python-version: default + python-version: ${{ matrix.python_version }} - name: Start DinD uses: ./.github/actions/dind-up-action id: dind + if: contains(matrix.os, 'self-hosted') with: # Enable all the new features cleanup-dind-on-start: "true" @@ -97,9 +106,9 @@ jobs: export-gh-env: "true" - name: Run preCommitPyCoverage env: - DOCKER_HOST: ${{ steps.dind.outputs.docker-host }} + DOCKER_HOST: ${{ contains(matrix.os, 'self-hosted') && steps.dind.outputs.docker-host || '' }} TOX_TESTENV_PASSENV: "DOCKER_*,TESTCONTAINERS_*,TC_*,BEAM_*,GRPC_*,OMP_*,OPENBLAS_*,PYTHONHASHSEED,PYTEST_*" - TESTCONTAINERS_HOST_OVERRIDE: ${{ env.DIND_IP }} + TESTCONTAINERS_HOST_OVERRIDE: ${{ contains(matrix.os, 'self-hosted') && env.DIND_IP || '' }} TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE: "/var/run/docker.sock" TESTCONTAINERS_RYUK_DISABLED: "false" TESTCONTAINERS_RYUK_CONTAINER_PRIVILEGED: "true" @@ -110,6 +119,12 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :sdks:python:test-suites:tox:py39:preCommitPyCoverage + arguments: | + -Pposargs="${{ + contains(matrix.os, 'self-hosted') && + '-m (not require_docker_in_docker)' || + '-m require_docker_in_docker' + }}" - uses: codecov/codecov-action@v3 with: flags: python From 984c0a01c0421216ab46e9147b448af446dfc99f Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Mon, 25 Aug 2025 22:37:19 +0000 Subject: [PATCH 28/29] workflows: temporarily removing `ubuntu-latest` till resolving deps --- .github/workflows/beam_PreCommit_Python_Coverage.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/beam_PreCommit_Python_Coverage.yml b/.github/workflows/beam_PreCommit_Python_Coverage.yml index 95bc78c9dd5f..018b516b9cfa 100644 --- a/.github/workflows/beam_PreCommit_Python_Coverage.yml +++ b/.github/workflows/beam_PreCommit_Python_Coverage.yml @@ -71,7 +71,8 @@ jobs: # self-hosted runners due to Docker-in-Docker environment constraint. # These tests will only execute on ubuntu-latest (GitHub-hosted). # Context: https://github.com/apache/beam/pull/35585 - os: [[self-hosted, ubuntu-20.04, highmem], [ubuntu-latest]] + # Temporary removed the ubuntu-latest env till resolving deps issues. + os: [[self-hosted, ubuntu-20.04, highmem]] timeout-minutes: 180 if: | github.event_name == 'push' || From 4c8c06efa9cfdf5ba2cd96935af67c82ce0627c1 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Tue, 26 Aug 2025 17:15:26 +0000 Subject: [PATCH 29/29] workflows: add `matrix.os` label to `beam_PreCommit_Python_Coverage` --- .github/workflows/beam_PreCommit_Python_Coverage.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/beam_PreCommit_Python_Coverage.yml b/.github/workflows/beam_PreCommit_Python_Coverage.yml index 018b516b9cfa..3da51a2eceda 100644 --- a/.github/workflows/beam_PreCommit_Python_Coverage.yml +++ b/.github/workflows/beam_PreCommit_Python_Coverage.yml @@ -58,7 +58,7 @@ env: jobs: beam_PreCommit_Python_Coverage: - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) (${{ join(matrix.os, ', ') }}) runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -87,7 +87,7 @@ jobs: with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) (${{ join(matrix.os, ', ') }}) - name: Setup environment uses: ./.github/actions/setup-environment-action with: @@ -134,7 +134,7 @@ jobs: uses: actions/upload-artifact@v4 if: failure() with: - name: Python Test Results + name: Python ${{ matrix.python_version }} Test Results (${{ join(matrix.os, ', ') }}) path: '**/pytest*.xml' - name: Publish Python Test Results env: @@ -145,4 +145,5 @@ jobs: commit: '${{ env.prsha || env.GITHUB_SHA }}' comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/pytest*.xml' - large_files: true \ No newline at end of file + large_files: true + check_name: "Python ${{ matrix.python_version }} Test Results (${{ join(matrix.os, ', ') }})"