From 771bfe54474e2cc54dbb87609ea39dfedf010ee9 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Fri, 6 Jun 2025 18:12:25 +0000 Subject: [PATCH 01/35] fix postcommit oracle test --- sdks/python/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index bfcbd4b6b8af..d8072f417013 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -436,7 +436,8 @@ def get_portability_package_data(): 'virtualenv-clone>=0.5,<1.0', 'mysql-connector-python>=9.3.0', 'python-tds>=1.16.1', - 'sqlalchemy-pytds>=1.0.2' + 'sqlalchemy-pytds>=1.0.2', + 'oracledb>=3.1.1' ], 'gcp': [ 'cachetools>=3.1.0,<6', From 91abfe50086494c6a067e38915701f56047ecb1b Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Fri, 6 Jun 2025 18:34:52 +0000 Subject: [PATCH 02/35] add revision --- .github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json index a975cd1cd104..541dc4ea8e87 100644 --- a/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json +++ b/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json @@ -1,4 +1,4 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run", - "revision": 1 + "revision": 2 } From f22539ba94c55ed896a362e930aea26b0717338d Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Fri, 6 Jun 2025 19:46:08 +0000 Subject: [PATCH 03/35] switch to hosted runner to try with kafka test --- .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index 8ec3c2bc7aaf..97b56a31ef69 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -54,7 +54,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || (github.event_name == 'schedule' && github.repository == 'apache/beam') - runs-on: [self-hosted, ubuntu-20.04, main] + runs-on: ubuntu-latest timeout-minutes: 100 name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: From 53fabed8256ba8e77b3e1697630a45994d734e42 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Mon, 9 Jun 2025 14:58:32 +0000 Subject: [PATCH 04/35] add extended timeout --- sdks/python/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index accacb1953bb..771b413aa8b0 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -157,7 +157,7 @@ tasks.register("postCommitYamlIntegrationTests") { doLast { exec { executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='extended_tests'" + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='extended_tests' --timeout=1200" } } } From 01865d515a12eb81a8713ba91174714d8492b624 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Mon, 9 Jun 2025 17:33:53 +0000 Subject: [PATCH 05/35] upgrade to 4.10 testcontainers --- sdks/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index d8072f417013..0744f6d6fd8f 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -430,7 +430,7 @@ def get_portability_package_data(): 'sqlalchemy>=1.3,<3.0', 'psycopg2-binary>=2.8.5,<2.9.10; python_version <= "3.9"', 'psycopg2-binary>=2.8.5,<3.0; python_version >= "3.10"', - 'testcontainers[mysql,kafka]>=3.0.3,<4.0.0', + 'testcontainers[mysql,kafka]>=4.10.0', 'cryptography>=41.0.2', 'hypothesis>5.0.0,<7.0.0', 'virtualenv-clone>=0.5,<1.0', From 9c969d9c196ecd7c35727e49965005445233697e Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Mon, 9 Jun 2025 17:34:26 +0000 Subject: [PATCH 06/35] switch out to redpanda for kafka --- sdks/python/apache_beam/yaml/integration_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/yaml/integration_tests.py b/sdks/python/apache_beam/yaml/integration_tests.py index f6c60ae5f121..d8f520cc8a24 100644 --- a/sdks/python/apache_beam/yaml/integration_tests.py +++ b/sdks/python/apache_beam/yaml/integration_tests.py @@ -39,7 +39,7 @@ from testcontainers.core.container import DockerContainer from testcontainers.core.waiting_utils import wait_for_logs from testcontainers.google import PubSubContainer -from testcontainers.kafka import KafkaContainer +from testcontainers.kafka import RedpandaContainer from testcontainers.mssql import SqlServerContainer from testcontainers.mysql import MySqlContainer from testcontainers.postgres import PostgresContainer @@ -441,7 +441,7 @@ def temp_kafka_server(): interacting with the temporary Kafka server. """ try: - with KafkaContainer() as kafka_container: + with RedpandaContainer() as kafka_container: yield kafka_container.get_bootstrap_server() except Exception as err: logging.error("Error interacting with temporary Kakfa Server: %s", err) From 2c9c3fb3c624601b419b1b098e6a266ca2014b99 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Mon, 9 Jun 2025 20:52:41 +0000 Subject: [PATCH 07/35] remove redpandacontainer --- .../yaml/extended_tests/kafka.yaml | 58 +++++++++---------- .../apache_beam/yaml/integration_tests.py | 12 ++-- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml b/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml index c2e3c8657ee2..af7a5dd48b9a 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml @@ -46,35 +46,35 @@ pipelines: producer_config_updates: linger.ms: "0" - # Kafka read pipeline - # Need a separate read pipeline to make sure the write pipeline is flushed - - pipeline: - type: chain - transforms: - - type: ReadFromKafka - config: - format: "RAW" - topic: "silly_topic" - bootstrap_servers: "{TEMP_BOOTSTAP_SERVER}" - consumer_config: - auto.offset.reset: "earliest" - group.id: "yaml-kafka-test-group" - max_read_time_seconds: 10 # will read forever if not set - - type: MapToFields - config: - language: python - fields: - value: - callable: | - # Kafka RAW format reads messages as bytes in the 'payload' field of a Row - lambda row: row.payload.decode('utf-8') - output_type: string - - type: AssertEqual - config: - elements: - - {value: "123"} - - {value: "456"} - - {value: "789"} + # # Kafka read pipeline + # # Need a separate read pipeline to make sure the write pipeline is flushed + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromKafka + # config: + # format: "RAW" + # topic: "silly_topic" + # bootstrap_servers: "{TEMP_BOOTSTAP_SERVER}" + # consumer_config: + # auto.offset.reset: "earliest" + # group.id: "yaml-kafka-test-group" + # max_read_time_seconds: 10 # will read forever if not set + # - type: MapToFields + # config: + # language: python + # fields: + # value: + # callable: | + # # Kafka RAW format reads messages as bytes in the 'payload' field of a Row + # lambda row: row.payload.decode('utf-8') + # output_type: string + # - type: AssertEqual + # config: + # elements: + # - {value: "123"} + # - {value: "456"} + # - {value: "789"} # TODO: Error handling hard to trigger upon initial investigations. Need to # investigate more. diff --git a/sdks/python/apache_beam/yaml/integration_tests.py b/sdks/python/apache_beam/yaml/integration_tests.py index d8f520cc8a24..9b47d15789b8 100644 --- a/sdks/python/apache_beam/yaml/integration_tests.py +++ b/sdks/python/apache_beam/yaml/integration_tests.py @@ -39,7 +39,7 @@ from testcontainers.core.container import DockerContainer from testcontainers.core.waiting_utils import wait_for_logs from testcontainers.google import PubSubContainer -from testcontainers.kafka import RedpandaContainer +from testcontainers.kafka import KafkaContainer from testcontainers.mssql import SqlServerContainer from testcontainers.mysql import MySqlContainer from testcontainers.postgres import PostgresContainer @@ -440,12 +440,12 @@ def temp_kafka_server(): Exception: If there's an error starting the Kafka container or interacting with the temporary Kafka server. """ - try: - with RedpandaContainer() as kafka_container: + with KafkaContainer().with_kraft() as kafka_container: + try: yield kafka_container.get_bootstrap_server() - except Exception as err: - logging.error("Error interacting with temporary Kakfa Server: %s", err) - raise err + except Exception as err: + logging.error("Error interacting with temporary Kakfa Server: %s", err) + raise err @contextlib.contextmanager From 9e2033d4ef42393a66e80ee6b056d5a10213f4cb Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 00:15:58 +0000 Subject: [PATCH 08/35] tmp comment --- .../yaml/extended_tests/bigquery.yaml | 94 +++++++------- .../yaml/extended_tests/enrichment.yaml | 102 +++++++-------- .../apache_beam/yaml/extended_tests/jdbc.yaml | 46 +++---- .../yaml/extended_tests/kafka.yaml | 58 ++++----- .../yaml/extended_tests/mysql.yaml | 44 +++---- .../yaml/extended_tests/oracle.yaml | 66 +++++----- .../yaml/extended_tests/pubsub.yaml | 22 ++-- .../yaml/extended_tests/spanner.yaml | 122 +++++++++--------- .../yaml/extended_tests/sqlserver.yaml | 44 +++---- .../yaml/extended_tests/tfrecord.yaml | 78 +++++------ 10 files changed, 338 insertions(+), 338 deletions(-) diff --git a/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml b/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml index f5ab31b3855b..6e15fea25c93 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml @@ -15,16 +15,16 @@ # limitations under the License. # -fixtures: - - name: BQ_TABLE - type: "apache_beam.yaml.integration_tests.temp_bigquery_table" - config: - project: "apache-beam-testing" - - name: TEMP_DIR - # Need distributed filesystem to be able to read and write from a container. - type: "apache_beam.yaml.integration_tests.gcs_temp_dir" - config: - bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" +# fixtures: +# - name: BQ_TABLE +# type: "apache_beam.yaml.integration_tests.temp_bigquery_table" +# config: +# project: "apache-beam-testing" +# - name: TEMP_DIR +# # Need distributed filesystem to be able to read and write from a container. +# type: "apache_beam.yaml.integration_tests.gcs_temp_dir" +# config: +# bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" pipelines: - pipeline: @@ -36,42 +36,42 @@ pipelines: - {label: "11a", rank: 0} - {label: "37a", rank: 1} - {label: "389a", rank: 2} - - type: WriteToBigQuery - config: - table: "{BQ_TABLE}" - options: - project: "apache-beam-testing" - temp_location: "{TEMP_DIR}" + # - type: WriteToBigQuery + # config: + # table: "{BQ_TABLE}" + # options: + # project: "apache-beam-testing" + # temp_location: "{TEMP_DIR}" - - pipeline: - type: chain - transforms: - - type: ReadFromBigQuery - config: - table: "{BQ_TABLE}" - - type: AssertEqual - config: - elements: - - {label: "11a", rank: 0} - - {label: "37a", rank: 1} - - {label: "389a", rank: 2} - options: - project: "apache-beam-testing" - temp_location: "{TEMP_DIR}" + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromBigQuery + # config: + # table: "{BQ_TABLE}" + # - type: AssertEqual + # config: + # elements: + # - {label: "11a", rank: 0} + # - {label: "37a", rank: 1} + # - {label: "389a", rank: 2} + # options: + # project: "apache-beam-testing" + # temp_location: "{TEMP_DIR}" - - pipeline: - type: chain - transforms: - - type: ReadFromBigQuery - config: - table: "{BQ_TABLE}" - fields: ["label"] - row_restriction: "rank > 0" - - type: AssertEqual - config: - elements: - - {label: "37a"} - - {label: "389a"} - options: - project: "apache-beam-testing" - temp_location: "{TEMP_DIR}" + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromBigQuery + # config: + # table: "{BQ_TABLE}" + # fields: ["label"] + # row_restriction: "rank > 0" + # - type: AssertEqual + # config: + # elements: + # - {label: "37a"} + # - {label: "389a"} + # options: + # project: "apache-beam-testing" + # temp_location: "{TEMP_DIR}" diff --git a/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml b/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml index 6469c094b8b4..ff5ec00bf982 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml @@ -15,15 +15,15 @@ # limitations under the License. # -fixtures: - - name: BQ_TABLE - type: "apache_beam.yaml.integration_tests.temp_bigquery_table" - config: - project: "apache-beam-testing" - - name: TEMP_DIR - type: "apache_beam.yaml.integration_tests.gcs_temp_dir" - config: - bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" +# fixtures: +# - name: BQ_TABLE +# type: "apache_beam.yaml.integration_tests.temp_bigquery_table" +# config: +# project: "apache-beam-testing" +# - name: TEMP_DIR +# type: "apache_beam.yaml.integration_tests.gcs_temp_dir" +# config: +# bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" pipelines: - pipeline: @@ -37,48 +37,48 @@ pipelines: - {label: '37a', rank: 1} - {label: '389a', rank: 2} - - type: WriteToBigQuery - config: - table: "{BQ_TABLE}" + # - type: WriteToBigQuery + # config: + # table: "{BQ_TABLE}" - - pipeline: - type: chain - transforms: - - type: Create - name: Data - config: - elements: - - {label: '11a', name: 'S1'} - - {label: '37a', name: 'S2'} - - {label: '389a', name: 'S3'} - - type: Enrichment - name: Enriched - config: - enrichment_handler: 'BigQuery' - handler_config: - project: apache-beam-testing - table_name: "{BQ_TABLE}" - fields: ['label'] - row_restriction_template: "label = '37a'" - timeout: 30 + # - pipeline: + # type: chain + # transforms: + # - type: Create + # name: Data + # config: + # elements: + # - {label: '11a', name: 'S1'} + # - {label: '37a', name: 'S2'} + # - {label: '389a', name: 'S3'} + # - type: Enrichment + # name: Enriched + # config: + # enrichment_handler: 'BigQuery' + # handler_config: + # project: apache-beam-testing + # table_name: "{BQ_TABLE}" + # fields: ['label'] + # row_restriction_template: "label = '37a'" + # timeout: 30 - - type: MapToFields - config: - language: python - fields: - label: - callable: 'lambda x: x.label' - output_type: string - rank: - callable: 'lambda x: x.rank' - output_type: integer - name: - callable: 'lambda x: x.name' - output_type: string + # - type: MapToFields + # config: + # language: python + # fields: + # label: + # callable: 'lambda x: x.label' + # output_type: string + # rank: + # callable: 'lambda x: x.rank' + # output_type: integer + # name: + # callable: 'lambda x: x.name' + # output_type: string - - type: AssertEqual - config: - elements: - - {label: '37a', rank: 1, name: 'S2'} - options: - yaml_experimental_features: [ 'Enrichment' ] \ No newline at end of file + # - type: AssertEqual + # config: + # elements: + # - {label: '37a', rank: 1, name: 'S2'} + # options: + # yaml_experimental_features: [ 'Enrichment' ] \ No newline at end of file diff --git a/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml b/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml index fffdf96f463d..3b86d911df72 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml @@ -16,9 +16,9 @@ # -fixtures: - - name: TEMP_DB - type: "apache_beam.yaml.integration_tests.temp_sqlite_database" +# fixtures: +# - name: TEMP_DB +# type: "apache_beam.yaml.integration_tests.temp_sqlite_database" pipelines: # Jdbc write pipeline @@ -31,25 +31,25 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - - type: WriteToJdbc - config: - url: "{TEMP_DB}" - driver_class_name: "org.sqlite.JDBC" - query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + # - type: WriteToJdbc + # config: + # url: "{TEMP_DB}" + # driver_class_name: "org.sqlite.JDBC" + # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # Jdbc read pipeline - - pipeline: - type: chain - transforms: - - type: ReadFromJdbc - config: - url: "{TEMP_DB}" - driver_class_name: "org.sqlite.JDBC" - query: "SELECT * FROM tmp_table" - - type: AssertEqual - config: - elements: - - {value: 123, rank: 0} - - {value: 456, rank: 1} - - {value: 789, rank: 2} + # # Jdbc read pipeline + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromJdbc + # config: + # url: "{TEMP_DB}" + # driver_class_name: "org.sqlite.JDBC" + # query: "SELECT * FROM tmp_table" + # - type: AssertEqual + # config: + # elements: + # - {value: 123, rank: 0} + # - {value: 456, rank: 1} + # - {value: 789, rank: 2} diff --git a/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml b/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml index af7a5dd48b9a..c2e3c8657ee2 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml @@ -46,35 +46,35 @@ pipelines: producer_config_updates: linger.ms: "0" - # # Kafka read pipeline - # # Need a separate read pipeline to make sure the write pipeline is flushed - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromKafka - # config: - # format: "RAW" - # topic: "silly_topic" - # bootstrap_servers: "{TEMP_BOOTSTAP_SERVER}" - # consumer_config: - # auto.offset.reset: "earliest" - # group.id: "yaml-kafka-test-group" - # max_read_time_seconds: 10 # will read forever if not set - # - type: MapToFields - # config: - # language: python - # fields: - # value: - # callable: | - # # Kafka RAW format reads messages as bytes in the 'payload' field of a Row - # lambda row: row.payload.decode('utf-8') - # output_type: string - # - type: AssertEqual - # config: - # elements: - # - {value: "123"} - # - {value: "456"} - # - {value: "789"} + # Kafka read pipeline + # Need a separate read pipeline to make sure the write pipeline is flushed + - pipeline: + type: chain + transforms: + - type: ReadFromKafka + config: + format: "RAW" + topic: "silly_topic" + bootstrap_servers: "{TEMP_BOOTSTAP_SERVER}" + consumer_config: + auto.offset.reset: "earliest" + group.id: "yaml-kafka-test-group" + max_read_time_seconds: 10 # will read forever if not set + - type: MapToFields + config: + language: python + fields: + value: + callable: | + # Kafka RAW format reads messages as bytes in the 'payload' field of a Row + lambda row: row.payload.decode('utf-8') + output_type: string + - type: AssertEqual + config: + elements: + - {value: "123"} + - {value: "456"} + - {value: "789"} # TODO: Error handling hard to trigger upon initial investigations. Need to # investigate more. diff --git a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml b/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml index 19c6774b2252..570902b633bb 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -fixtures: - - name: TEMP_DB - type: "apache_beam.yaml.integration_tests.temp_mysql_database" +# fixtures: +# - name: TEMP_DB +# type: "apache_beam.yaml.integration_tests.temp_mysql_database" pipelines: # MySql write pipeline @@ -30,24 +30,24 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - - type: WriteToMySql - config: - url: "{TEMP_DB}" - query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" + # - type: WriteToMySql + # config: + # url: "{TEMP_DB}" + # query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" - # MySql read pipeline - - pipeline: - type: chain - transforms: - - type: ReadFromMySql - config: - url: "{TEMP_DB}" - query: "SELECT * FROM tmp_table" - driver_class_name: "com.mysql.cj.jdbc.Driver" - - type: AssertEqual - config: - elements: - - {value: 123, rank: 0} - - {value: 456, rank: 1} - - {value: 789, rank: 2} + # # MySql read pipeline + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromMySql + # config: + # url: "{TEMP_DB}" + # query: "SELECT * FROM tmp_table" + # driver_class_name: "com.mysql.cj.jdbc.Driver" + # - type: AssertEqual + # config: + # elements: + # - {value: 123, rank: 0} + # - {value: 456, rank: 1} + # - {value: 789, rank: 2} \ No newline at end of file diff --git a/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml b/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml index 01d23989d166..4f11b9d0c3b7 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -fixtures: - - name: TEMP_DB - type: "apache_beam.yaml.integration_tests.temp_oracle_database" +# fixtures: +# - name: TEMP_DB +# type: "apache_beam.yaml.integration_tests.temp_oracle_database" pipelines: # Oracle write pipeline @@ -30,34 +30,34 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - - type: WriteToOracle - config: - url: "{TEMP_DB}" - query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + # - type: WriteToOracle + # config: + # url: "{TEMP_DB}" + # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # Oracle read pipeline - # Need a separate read pipeline to make sure the write pipeline is flushed - - pipeline: - type: chain - transforms: - - type: ReadFromOracle - config: - url: "{TEMP_DB}" - query: "SELECT * FROM tmp_table" - driver_class_name: "oracle.jdbc.OracleDriver" - - type: MapToFields - config: - language: python - fields: - value: - callable: "lambda x: int(x.VALUE)" - output_type: integer - rank: - callable: "lambda x: int(x.RANK)" - output_type: integer - - type: AssertEqual - config: - elements: - - {value: 123, rank: 0} - - {value: 456, rank: 1} - - {value: 789, rank: 2} + # # Oracle read pipeline + # # Need a separate read pipeline to make sure the write pipeline is flushed + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromOracle + # config: + # url: "{TEMP_DB}" + # query: "SELECT * FROM tmp_table" + # driver_class_name: "oracle.jdbc.OracleDriver" + # - type: MapToFields + # config: + # language: python + # fields: + # value: + # callable: "lambda x: int(x.VALUE)" + # output_type: integer + # rank: + # callable: "lambda x: int(x.RANK)" + # output_type: integer + # - type: AssertEqual + # config: + # elements: + # - {value: 123, rank: 0} + # - {value: 456, rank: 1} + # - {value: 789, rank: 2} diff --git a/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml b/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml index 41f739ac77e0..af8cb51207fc 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml @@ -15,11 +15,11 @@ # limitations under the License. # -fixtures: - - name: PS_TOPIC - type: "apache_beam.yaml.integration_tests.temp_pubsub_emulator" - config: - project_id: "apache-beam-testing" +# fixtures: +# - name: PS_TOPIC +# type: "apache_beam.yaml.integration_tests.temp_pubsub_emulator" +# config: +# project_id: "apache-beam-testing" pipelines: # Pubsub write pipeline @@ -32,13 +32,13 @@ pipelines: - {value: "11a"} - {value: "37a"} - {value: "389a"} - - type: WriteToPubSub - config: - topic: "{PS_TOPIC}" - format: "RAW" + # - type: WriteToPubSub + # config: + # topic: "{PS_TOPIC}" + # format: "RAW" - options: - streaming: true + # options: + # streaming: true # TODO: Current PubSubIO doesn't have a max_read_time_seconds parameter like diff --git a/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml b/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml index d4345441825a..bb4cd4b3abf8 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml @@ -15,15 +15,15 @@ # limitations under the License. # -fixtures: - - name: SPANNER_TABLE - type: "apache_beam.yaml.integration_tests.temp_spanner_table" - config: - project: "apache-beam-testing" - - name: TEMP_DIR - type: "apache_beam.yaml.integration_tests.gcs_temp_dir" - config: - bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" +# fixtures: +# - name: SPANNER_TABLE +# type: "apache_beam.yaml.integration_tests.temp_spanner_table" +# config: +# project: "apache-beam-testing" +# - name: TEMP_DIR +# type: "apache_beam.yaml.integration_tests.gcs_temp_dir" +# config: +# bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" pipelines: - pipeline: @@ -35,61 +35,61 @@ pipelines: - UserId: "01" Key: "Apple" - - type: WriteToSpanner - name: Write - input: Row - config: - project: "{SPANNER_TABLE[0]}" - instance: "{SPANNER_TABLE[1]}" - database: "{SPANNER_TABLE[2]}" - table: "{SPANNER_TABLE[3]}" - error_handling: - output: my_error_output + # - type: WriteToSpanner + # name: Write + # input: Row + # config: + # project: "{SPANNER_TABLE[0]}" + # instance: "{SPANNER_TABLE[1]}" + # database: "{SPANNER_TABLE[2]}" + # table: "{SPANNER_TABLE[3]}" + # error_handling: + # output: my_error_output - - type: LogForTesting - input: Write.my_error_output + # - type: LogForTesting + # input: Write.my_error_output - - pipeline: - type: chain - transforms: - - type: ReadFromSpanner - config: - project: "{SPANNER_TABLE[0]}" - instance: "{SPANNER_TABLE[1]}" - database: "{SPANNER_TABLE[2]}" - query: 'SELECT * FROM tmp_table where UserId = "01"' + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromSpanner + # config: + # project: "{SPANNER_TABLE[0]}" + # instance: "{SPANNER_TABLE[1]}" + # database: "{SPANNER_TABLE[2]}" + # query: 'SELECT * FROM tmp_table where UserId = "01"' - - type: AssertEqual - config: - elements: - - UserId: "01" - Key: "Apple" + # - type: AssertEqual + # config: + # elements: + # - UserId: "01" + # Key: "Apple" - - pipeline: - type: chain - transforms: - - type: Create - config: - elements: - - UserId: "02" - Key: "Mango" + # - pipeline: + # type: chain + # transforms: + # - type: Create + # config: + # elements: + # - UserId: "02" + # Key: "Mango" - - type: WriteToSpanner - config: - project: "{SPANNER_TABLE[0]}" - instance: "{SPANNER_TABLE[1]}" - database: "{SPANNER_TABLE[2]}" - table: "{SPANNER_TABLE[3]}" + # - type: WriteToSpanner + # config: + # project: "{SPANNER_TABLE[0]}" + # instance: "{SPANNER_TABLE[1]}" + # database: "{SPANNER_TABLE[2]}" + # table: "{SPANNER_TABLE[3]}" - - pipeline: - type: chain - transforms: - - type: ReadFromSpanner - config: - project: "{SPANNER_TABLE[0]}" - instance: "{SPANNER_TABLE[1]}" - database: "{SPANNER_TABLE[2]}" - table: "{SPANNER_TABLE[3]}" - columns: - - "{SPANNER_TABLE[4][0]}" - - "{SPANNER_TABLE[4][1]}" + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromSpanner + # config: + # project: "{SPANNER_TABLE[0]}" + # instance: "{SPANNER_TABLE[1]}" + # database: "{SPANNER_TABLE[2]}" + # table: "{SPANNER_TABLE[3]}" + # columns: + # - "{SPANNER_TABLE[4][0]}" + # - "{SPANNER_TABLE[4][1]}" diff --git a/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml b/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml index 9e5ba2ab2ec2..501ca3ffd1a2 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -fixtures: - - name: TEMP_DB - type: "apache_beam.yaml.integration_tests.temp_sqlserver_database" +# fixtures: +# - name: TEMP_DB +# type: "apache_beam.yaml.integration_tests.temp_sqlserver_database" pipelines: # SqlServer write pipeline @@ -30,24 +30,24 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - - type: WriteToSqlServer - config: - url: "{TEMP_DB}" - query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + # - type: WriteToSqlServer + # config: + # url: "{TEMP_DB}" + # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # SqlServer read pipeline - - pipeline: - type: chain - transforms: - - type: ReadFromSqlServer - config: - url: "{TEMP_DB}" - query: "SELECT * FROM tmp_table" - driver_class_name: "com.microsoft.sqlserver.jdbc.SQLServerDriver" - - type: AssertEqual - config: - elements: - - {value: 123, rank: 0} - - {value: 456, rank: 1} - - {value: 789, rank: 2} + # # SqlServer read pipeline + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromSqlServer + # config: + # url: "{TEMP_DB}" + # query: "SELECT * FROM tmp_table" + # driver_class_name: "com.microsoft.sqlserver.jdbc.SQLServerDriver" + # - type: AssertEqual + # config: + # elements: + # - {value: 123, rank: 0} + # - {value: 456, rank: 1} + # - {value: 789, rank: 2} \ No newline at end of file diff --git a/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml b/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml index e6316e4f4a56..3de5c8d1a737 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -fixtures: - - name: TEMP_DIR - type: "tempfile.TemporaryDirectory" +# fixtures: +# - name: TEMP_DIR +# type: "tempfile.TemporaryDirectory" pipelines: - pipeline: @@ -27,40 +27,40 @@ pipelines: config: elements: - {name: "foo"} - - type: MapToFields - config: - language: python - fields: - record: - callable: | - def process(row): - return row.name.encode('utf-8') - output_type: bytes - - type: WriteToTFRecord - config: - file_path_prefix: "{TEMP_DIR}" - file_name_suffix: ".tfrecords" - compression_type: "UNCOMPRESSED" - num_shards: 1 + # - type: MapToFields + # config: + # language: python + # fields: + # record: + # callable: | + # def process(row): + # return row.name.encode('utf-8') + # output_type: bytes + # - type: WriteToTFRecord + # config: + # file_path_prefix: "{TEMP_DIR}" + # file_name_suffix: ".tfrecords" + # compression_type: "UNCOMPRESSED" + # num_shards: 1 - - pipeline: - type: chain - transforms: - - type: ReadFromTFRecord - config: - file_pattern: "{TEMP_DIR}*" - compression_type: "AUTO" - validate: true - - type: MapToFields - config: - language: python - fields: - record: - callable: | - def process(row): - return row.record.decode('utf-8') - output_type: string - - type: AssertEqual - config: - elements: - - {record: "foo"} \ No newline at end of file + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromTFRecord + # config: + # file_pattern: "{TEMP_DIR}*" + # compression_type: "AUTO" + # validate: true + # - type: MapToFields + # config: + # language: python + # fields: + # record: + # callable: | + # def process(row): + # return row.record.decode('utf-8') + # output_type: string + # - type: AssertEqual + # config: + # elements: + # - {record: "foo"} \ No newline at end of file From f69cc5271e5ae38490557708068bdeea52892513 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 01:36:48 +0000 Subject: [PATCH 09/35] add postgres comment --- .../yaml/extended_tests/postgres.yaml | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml b/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml index 8957c782753e..183685a13212 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -fixtures: - - name: TEMP_DB - type: "apache_beam.yaml.integration_tests.temp_postgres_database" +# fixtures: +# - name: TEMP_DB +# type: "apache_beam.yaml.integration_tests.temp_postgres_database" pipelines: # Postgres write pipeline @@ -30,40 +30,40 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - - type: WriteToPostgres - config: - url: "{TEMP_DB}" - query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + # - type: WriteToPostgres + # config: + # url: "{TEMP_DB}" + # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # Postgres read pipeline with driver_class_name - - pipeline: - type: chain - transforms: - - type: ReadFromPostgres - config: - url: "{TEMP_DB}" - query: "SELECT * FROM tmp_table" - driver_class_name: "org.postgresql.Driver" - - type: AssertEqual - config: - elements: - - {value: 123, rank: 0} - - {value: 456, rank: 1} - - {value: 789, rank: 2} + # # Postgres read pipeline with driver_class_name + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromPostgres + # config: + # url: "{TEMP_DB}" + # query: "SELECT * FROM tmp_table" + # driver_class_name: "org.postgresql.Driver" + # - type: AssertEqual + # config: + # elements: + # - {value: 123, rank: 0} + # - {value: 456, rank: 1} + # - {value: 789, rank: 2} - # Postgres read pipeline without driver_class_name - # This is to prevent https://github.com/apache/beam/issues/35122 from - # happening again. - - pipeline: - type: chain - transforms: - - type: ReadFromPostgres - config: - url: "{TEMP_DB}" - query: "SELECT * FROM tmp_table" - - type: AssertEqual - config: - elements: - - {value: 123, rank: 0} - - {value: 456, rank: 1} - - {value: 789, rank: 2} \ No newline at end of file + # # Postgres read pipeline without driver_class_name + # # This is to prevent https://github.com/apache/beam/issues/35122 from + # # happening again. + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromPostgres + # config: + # url: "{TEMP_DB}" + # query: "SELECT * FROM tmp_table" + # - type: AssertEqual + # config: + # elements: + # - {value: 123, rank: 0} + # - {value: 456, rank: 1} + # - {value: 789, rank: 2} \ No newline at end of file From 1273a7b9b4145ab84e15b087786468b4242aaba2 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 15:07:17 +0000 Subject: [PATCH 10/35] revert to old kafkaContainer --- sdks/python/apache_beam/yaml/integration_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/yaml/integration_tests.py b/sdks/python/apache_beam/yaml/integration_tests.py index 9b47d15789b8..b076656e3200 100644 --- a/sdks/python/apache_beam/yaml/integration_tests.py +++ b/sdks/python/apache_beam/yaml/integration_tests.py @@ -440,7 +440,7 @@ def temp_kafka_server(): Exception: If there's an error starting the Kafka container or interacting with the temporary Kafka server. """ - with KafkaContainer().with_kraft() as kafka_container: + with KafkaContainer() as kafka_container: try: yield kafka_container.get_bootstrap_server() except Exception as err: From e0cffaf5d6835a6fa1f21ac382f11235f8aa12ca Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 17:40:53 +0000 Subject: [PATCH 11/35] revert commented out code and revert testcontainer version change --- .../yaml/extended_tests/bigquery.yaml | 94 +++++++------- .../yaml/extended_tests/enrichment.yaml | 102 +++++++-------- .../apache_beam/yaml/extended_tests/jdbc.yaml | 46 +++---- .../yaml/extended_tests/oracle.yaml | 66 +++++----- .../yaml/extended_tests/postgres.yaml | 76 +++++------ .../yaml/extended_tests/pubsub.yaml | 22 ++-- .../yaml/extended_tests/spanner.yaml | 122 +++++++++--------- .../yaml/extended_tests/sqlserver.yaml | 44 +++---- .../yaml/extended_tests/tfrecord.yaml | 78 +++++------ sdks/python/setup.py | 2 +- 10 files changed, 326 insertions(+), 326 deletions(-) diff --git a/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml b/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml index 6e15fea25c93..f5ab31b3855b 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml @@ -15,16 +15,16 @@ # limitations under the License. # -# fixtures: -# - name: BQ_TABLE -# type: "apache_beam.yaml.integration_tests.temp_bigquery_table" -# config: -# project: "apache-beam-testing" -# - name: TEMP_DIR -# # Need distributed filesystem to be able to read and write from a container. -# type: "apache_beam.yaml.integration_tests.gcs_temp_dir" -# config: -# bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" +fixtures: + - name: BQ_TABLE + type: "apache_beam.yaml.integration_tests.temp_bigquery_table" + config: + project: "apache-beam-testing" + - name: TEMP_DIR + # Need distributed filesystem to be able to read and write from a container. + type: "apache_beam.yaml.integration_tests.gcs_temp_dir" + config: + bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" pipelines: - pipeline: @@ -36,42 +36,42 @@ pipelines: - {label: "11a", rank: 0} - {label: "37a", rank: 1} - {label: "389a", rank: 2} - # - type: WriteToBigQuery - # config: - # table: "{BQ_TABLE}" - # options: - # project: "apache-beam-testing" - # temp_location: "{TEMP_DIR}" + - type: WriteToBigQuery + config: + table: "{BQ_TABLE}" + options: + project: "apache-beam-testing" + temp_location: "{TEMP_DIR}" - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromBigQuery - # config: - # table: "{BQ_TABLE}" - # - type: AssertEqual - # config: - # elements: - # - {label: "11a", rank: 0} - # - {label: "37a", rank: 1} - # - {label: "389a", rank: 2} - # options: - # project: "apache-beam-testing" - # temp_location: "{TEMP_DIR}" + - pipeline: + type: chain + transforms: + - type: ReadFromBigQuery + config: + table: "{BQ_TABLE}" + - type: AssertEqual + config: + elements: + - {label: "11a", rank: 0} + - {label: "37a", rank: 1} + - {label: "389a", rank: 2} + options: + project: "apache-beam-testing" + temp_location: "{TEMP_DIR}" - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromBigQuery - # config: - # table: "{BQ_TABLE}" - # fields: ["label"] - # row_restriction: "rank > 0" - # - type: AssertEqual - # config: - # elements: - # - {label: "37a"} - # - {label: "389a"} - # options: - # project: "apache-beam-testing" - # temp_location: "{TEMP_DIR}" + - pipeline: + type: chain + transforms: + - type: ReadFromBigQuery + config: + table: "{BQ_TABLE}" + fields: ["label"] + row_restriction: "rank > 0" + - type: AssertEqual + config: + elements: + - {label: "37a"} + - {label: "389a"} + options: + project: "apache-beam-testing" + temp_location: "{TEMP_DIR}" diff --git a/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml b/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml index ff5ec00bf982..6469c094b8b4 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml @@ -15,15 +15,15 @@ # limitations under the License. # -# fixtures: -# - name: BQ_TABLE -# type: "apache_beam.yaml.integration_tests.temp_bigquery_table" -# config: -# project: "apache-beam-testing" -# - name: TEMP_DIR -# type: "apache_beam.yaml.integration_tests.gcs_temp_dir" -# config: -# bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" +fixtures: + - name: BQ_TABLE + type: "apache_beam.yaml.integration_tests.temp_bigquery_table" + config: + project: "apache-beam-testing" + - name: TEMP_DIR + type: "apache_beam.yaml.integration_tests.gcs_temp_dir" + config: + bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" pipelines: - pipeline: @@ -37,48 +37,48 @@ pipelines: - {label: '37a', rank: 1} - {label: '389a', rank: 2} - # - type: WriteToBigQuery - # config: - # table: "{BQ_TABLE}" + - type: WriteToBigQuery + config: + table: "{BQ_TABLE}" - # - pipeline: - # type: chain - # transforms: - # - type: Create - # name: Data - # config: - # elements: - # - {label: '11a', name: 'S1'} - # - {label: '37a', name: 'S2'} - # - {label: '389a', name: 'S3'} - # - type: Enrichment - # name: Enriched - # config: - # enrichment_handler: 'BigQuery' - # handler_config: - # project: apache-beam-testing - # table_name: "{BQ_TABLE}" - # fields: ['label'] - # row_restriction_template: "label = '37a'" - # timeout: 30 + - pipeline: + type: chain + transforms: + - type: Create + name: Data + config: + elements: + - {label: '11a', name: 'S1'} + - {label: '37a', name: 'S2'} + - {label: '389a', name: 'S3'} + - type: Enrichment + name: Enriched + config: + enrichment_handler: 'BigQuery' + handler_config: + project: apache-beam-testing + table_name: "{BQ_TABLE}" + fields: ['label'] + row_restriction_template: "label = '37a'" + timeout: 30 - # - type: MapToFields - # config: - # language: python - # fields: - # label: - # callable: 'lambda x: x.label' - # output_type: string - # rank: - # callable: 'lambda x: x.rank' - # output_type: integer - # name: - # callable: 'lambda x: x.name' - # output_type: string + - type: MapToFields + config: + language: python + fields: + label: + callable: 'lambda x: x.label' + output_type: string + rank: + callable: 'lambda x: x.rank' + output_type: integer + name: + callable: 'lambda x: x.name' + output_type: string - # - type: AssertEqual - # config: - # elements: - # - {label: '37a', rank: 1, name: 'S2'} - # options: - # yaml_experimental_features: [ 'Enrichment' ] \ No newline at end of file + - type: AssertEqual + config: + elements: + - {label: '37a', rank: 1, name: 'S2'} + options: + yaml_experimental_features: [ 'Enrichment' ] \ No newline at end of file diff --git a/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml b/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml index 3b86d911df72..fffdf96f463d 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml @@ -16,9 +16,9 @@ # -# fixtures: -# - name: TEMP_DB -# type: "apache_beam.yaml.integration_tests.temp_sqlite_database" +fixtures: + - name: TEMP_DB + type: "apache_beam.yaml.integration_tests.temp_sqlite_database" pipelines: # Jdbc write pipeline @@ -31,25 +31,25 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - # - type: WriteToJdbc - # config: - # url: "{TEMP_DB}" - # driver_class_name: "org.sqlite.JDBC" - # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + - type: WriteToJdbc + config: + url: "{TEMP_DB}" + driver_class_name: "org.sqlite.JDBC" + query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # # Jdbc read pipeline - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromJdbc - # config: - # url: "{TEMP_DB}" - # driver_class_name: "org.sqlite.JDBC" - # query: "SELECT * FROM tmp_table" - # - type: AssertEqual - # config: - # elements: - # - {value: 123, rank: 0} - # - {value: 456, rank: 1} - # - {value: 789, rank: 2} + # Jdbc read pipeline + - pipeline: + type: chain + transforms: + - type: ReadFromJdbc + config: + url: "{TEMP_DB}" + driver_class_name: "org.sqlite.JDBC" + query: "SELECT * FROM tmp_table" + - type: AssertEqual + config: + elements: + - {value: 123, rank: 0} + - {value: 456, rank: 1} + - {value: 789, rank: 2} diff --git a/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml b/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml index 4f11b9d0c3b7..01d23989d166 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -# fixtures: -# - name: TEMP_DB -# type: "apache_beam.yaml.integration_tests.temp_oracle_database" +fixtures: + - name: TEMP_DB + type: "apache_beam.yaml.integration_tests.temp_oracle_database" pipelines: # Oracle write pipeline @@ -30,34 +30,34 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - # - type: WriteToOracle - # config: - # url: "{TEMP_DB}" - # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + - type: WriteToOracle + config: + url: "{TEMP_DB}" + query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # # Oracle read pipeline - # # Need a separate read pipeline to make sure the write pipeline is flushed - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromOracle - # config: - # url: "{TEMP_DB}" - # query: "SELECT * FROM tmp_table" - # driver_class_name: "oracle.jdbc.OracleDriver" - # - type: MapToFields - # config: - # language: python - # fields: - # value: - # callable: "lambda x: int(x.VALUE)" - # output_type: integer - # rank: - # callable: "lambda x: int(x.RANK)" - # output_type: integer - # - type: AssertEqual - # config: - # elements: - # - {value: 123, rank: 0} - # - {value: 456, rank: 1} - # - {value: 789, rank: 2} + # Oracle read pipeline + # Need a separate read pipeline to make sure the write pipeline is flushed + - pipeline: + type: chain + transforms: + - type: ReadFromOracle + config: + url: "{TEMP_DB}" + query: "SELECT * FROM tmp_table" + driver_class_name: "oracle.jdbc.OracleDriver" + - type: MapToFields + config: + language: python + fields: + value: + callable: "lambda x: int(x.VALUE)" + output_type: integer + rank: + callable: "lambda x: int(x.RANK)" + output_type: integer + - type: AssertEqual + config: + elements: + - {value: 123, rank: 0} + - {value: 456, rank: 1} + - {value: 789, rank: 2} diff --git a/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml b/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml index 183685a13212..8957c782753e 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -# fixtures: -# - name: TEMP_DB -# type: "apache_beam.yaml.integration_tests.temp_postgres_database" +fixtures: + - name: TEMP_DB + type: "apache_beam.yaml.integration_tests.temp_postgres_database" pipelines: # Postgres write pipeline @@ -30,40 +30,40 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - # - type: WriteToPostgres - # config: - # url: "{TEMP_DB}" - # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + - type: WriteToPostgres + config: + url: "{TEMP_DB}" + query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # # Postgres read pipeline with driver_class_name - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromPostgres - # config: - # url: "{TEMP_DB}" - # query: "SELECT * FROM tmp_table" - # driver_class_name: "org.postgresql.Driver" - # - type: AssertEqual - # config: - # elements: - # - {value: 123, rank: 0} - # - {value: 456, rank: 1} - # - {value: 789, rank: 2} + # Postgres read pipeline with driver_class_name + - pipeline: + type: chain + transforms: + - type: ReadFromPostgres + config: + url: "{TEMP_DB}" + query: "SELECT * FROM tmp_table" + driver_class_name: "org.postgresql.Driver" + - type: AssertEqual + config: + elements: + - {value: 123, rank: 0} + - {value: 456, rank: 1} + - {value: 789, rank: 2} - # # Postgres read pipeline without driver_class_name - # # This is to prevent https://github.com/apache/beam/issues/35122 from - # # happening again. - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromPostgres - # config: - # url: "{TEMP_DB}" - # query: "SELECT * FROM tmp_table" - # - type: AssertEqual - # config: - # elements: - # - {value: 123, rank: 0} - # - {value: 456, rank: 1} - # - {value: 789, rank: 2} \ No newline at end of file + # Postgres read pipeline without driver_class_name + # This is to prevent https://github.com/apache/beam/issues/35122 from + # happening again. + - pipeline: + type: chain + transforms: + - type: ReadFromPostgres + config: + url: "{TEMP_DB}" + query: "SELECT * FROM tmp_table" + - type: AssertEqual + config: + elements: + - {value: 123, rank: 0} + - {value: 456, rank: 1} + - {value: 789, rank: 2} \ No newline at end of file diff --git a/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml b/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml index af8cb51207fc..41f739ac77e0 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml @@ -15,11 +15,11 @@ # limitations under the License. # -# fixtures: -# - name: PS_TOPIC -# type: "apache_beam.yaml.integration_tests.temp_pubsub_emulator" -# config: -# project_id: "apache-beam-testing" +fixtures: + - name: PS_TOPIC + type: "apache_beam.yaml.integration_tests.temp_pubsub_emulator" + config: + project_id: "apache-beam-testing" pipelines: # Pubsub write pipeline @@ -32,13 +32,13 @@ pipelines: - {value: "11a"} - {value: "37a"} - {value: "389a"} - # - type: WriteToPubSub - # config: - # topic: "{PS_TOPIC}" - # format: "RAW" + - type: WriteToPubSub + config: + topic: "{PS_TOPIC}" + format: "RAW" - # options: - # streaming: true + options: + streaming: true # TODO: Current PubSubIO doesn't have a max_read_time_seconds parameter like diff --git a/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml b/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml index bb4cd4b3abf8..d4345441825a 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml @@ -15,15 +15,15 @@ # limitations under the License. # -# fixtures: -# - name: SPANNER_TABLE -# type: "apache_beam.yaml.integration_tests.temp_spanner_table" -# config: -# project: "apache-beam-testing" -# - name: TEMP_DIR -# type: "apache_beam.yaml.integration_tests.gcs_temp_dir" -# config: -# bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" +fixtures: + - name: SPANNER_TABLE + type: "apache_beam.yaml.integration_tests.temp_spanner_table" + config: + project: "apache-beam-testing" + - name: TEMP_DIR + type: "apache_beam.yaml.integration_tests.gcs_temp_dir" + config: + bucket: "gs://temp-storage-for-end-to-end-tests/temp-it" pipelines: - pipeline: @@ -35,61 +35,61 @@ pipelines: - UserId: "01" Key: "Apple" - # - type: WriteToSpanner - # name: Write - # input: Row - # config: - # project: "{SPANNER_TABLE[0]}" - # instance: "{SPANNER_TABLE[1]}" - # database: "{SPANNER_TABLE[2]}" - # table: "{SPANNER_TABLE[3]}" - # error_handling: - # output: my_error_output + - type: WriteToSpanner + name: Write + input: Row + config: + project: "{SPANNER_TABLE[0]}" + instance: "{SPANNER_TABLE[1]}" + database: "{SPANNER_TABLE[2]}" + table: "{SPANNER_TABLE[3]}" + error_handling: + output: my_error_output - # - type: LogForTesting - # input: Write.my_error_output + - type: LogForTesting + input: Write.my_error_output - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromSpanner - # config: - # project: "{SPANNER_TABLE[0]}" - # instance: "{SPANNER_TABLE[1]}" - # database: "{SPANNER_TABLE[2]}" - # query: 'SELECT * FROM tmp_table where UserId = "01"' + - pipeline: + type: chain + transforms: + - type: ReadFromSpanner + config: + project: "{SPANNER_TABLE[0]}" + instance: "{SPANNER_TABLE[1]}" + database: "{SPANNER_TABLE[2]}" + query: 'SELECT * FROM tmp_table where UserId = "01"' - # - type: AssertEqual - # config: - # elements: - # - UserId: "01" - # Key: "Apple" + - type: AssertEqual + config: + elements: + - UserId: "01" + Key: "Apple" - # - pipeline: - # type: chain - # transforms: - # - type: Create - # config: - # elements: - # - UserId: "02" - # Key: "Mango" + - pipeline: + type: chain + transforms: + - type: Create + config: + elements: + - UserId: "02" + Key: "Mango" - # - type: WriteToSpanner - # config: - # project: "{SPANNER_TABLE[0]}" - # instance: "{SPANNER_TABLE[1]}" - # database: "{SPANNER_TABLE[2]}" - # table: "{SPANNER_TABLE[3]}" + - type: WriteToSpanner + config: + project: "{SPANNER_TABLE[0]}" + instance: "{SPANNER_TABLE[1]}" + database: "{SPANNER_TABLE[2]}" + table: "{SPANNER_TABLE[3]}" - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromSpanner - # config: - # project: "{SPANNER_TABLE[0]}" - # instance: "{SPANNER_TABLE[1]}" - # database: "{SPANNER_TABLE[2]}" - # table: "{SPANNER_TABLE[3]}" - # columns: - # - "{SPANNER_TABLE[4][0]}" - # - "{SPANNER_TABLE[4][1]}" + - pipeline: + type: chain + transforms: + - type: ReadFromSpanner + config: + project: "{SPANNER_TABLE[0]}" + instance: "{SPANNER_TABLE[1]}" + database: "{SPANNER_TABLE[2]}" + table: "{SPANNER_TABLE[3]}" + columns: + - "{SPANNER_TABLE[4][0]}" + - "{SPANNER_TABLE[4][1]}" diff --git a/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml b/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml index 501ca3ffd1a2..9e5ba2ab2ec2 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -# fixtures: -# - name: TEMP_DB -# type: "apache_beam.yaml.integration_tests.temp_sqlserver_database" +fixtures: + - name: TEMP_DB + type: "apache_beam.yaml.integration_tests.temp_sqlserver_database" pipelines: # SqlServer write pipeline @@ -30,24 +30,24 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - # - type: WriteToSqlServer - # config: - # url: "{TEMP_DB}" - # query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" + - type: WriteToSqlServer + config: + url: "{TEMP_DB}" + query: "INSERT INTO tmp_table (value, rank) VALUES(?,?)" - # # SqlServer read pipeline - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromSqlServer - # config: - # url: "{TEMP_DB}" - # query: "SELECT * FROM tmp_table" - # driver_class_name: "com.microsoft.sqlserver.jdbc.SQLServerDriver" - # - type: AssertEqual - # config: - # elements: - # - {value: 123, rank: 0} - # - {value: 456, rank: 1} - # - {value: 789, rank: 2} + # SqlServer read pipeline + - pipeline: + type: chain + transforms: + - type: ReadFromSqlServer + config: + url: "{TEMP_DB}" + query: "SELECT * FROM tmp_table" + driver_class_name: "com.microsoft.sqlserver.jdbc.SQLServerDriver" + - type: AssertEqual + config: + elements: + - {value: 123, rank: 0} + - {value: 456, rank: 1} + - {value: 789, rank: 2} \ No newline at end of file diff --git a/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml b/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml index 3de5c8d1a737..e6316e4f4a56 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -# fixtures: -# - name: TEMP_DIR -# type: "tempfile.TemporaryDirectory" +fixtures: + - name: TEMP_DIR + type: "tempfile.TemporaryDirectory" pipelines: - pipeline: @@ -27,40 +27,40 @@ pipelines: config: elements: - {name: "foo"} - # - type: MapToFields - # config: - # language: python - # fields: - # record: - # callable: | - # def process(row): - # return row.name.encode('utf-8') - # output_type: bytes - # - type: WriteToTFRecord - # config: - # file_path_prefix: "{TEMP_DIR}" - # file_name_suffix: ".tfrecords" - # compression_type: "UNCOMPRESSED" - # num_shards: 1 + - type: MapToFields + config: + language: python + fields: + record: + callable: | + def process(row): + return row.name.encode('utf-8') + output_type: bytes + - type: WriteToTFRecord + config: + file_path_prefix: "{TEMP_DIR}" + file_name_suffix: ".tfrecords" + compression_type: "UNCOMPRESSED" + num_shards: 1 - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromTFRecord - # config: - # file_pattern: "{TEMP_DIR}*" - # compression_type: "AUTO" - # validate: true - # - type: MapToFields - # config: - # language: python - # fields: - # record: - # callable: | - # def process(row): - # return row.record.decode('utf-8') - # output_type: string - # - type: AssertEqual - # config: - # elements: - # - {record: "foo"} \ No newline at end of file + - pipeline: + type: chain + transforms: + - type: ReadFromTFRecord + config: + file_pattern: "{TEMP_DIR}*" + compression_type: "AUTO" + validate: true + - type: MapToFields + config: + language: python + fields: + record: + callable: | + def process(row): + return row.record.decode('utf-8') + output_type: string + - type: AssertEqual + config: + elements: + - {record: "foo"} \ No newline at end of file diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 0744f6d6fd8f..d8072f417013 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -430,7 +430,7 @@ def get_portability_package_data(): 'sqlalchemy>=1.3,<3.0', 'psycopg2-binary>=2.8.5,<2.9.10; python_version <= "3.9"', 'psycopg2-binary>=2.8.5,<3.0; python_version >= "3.10"', - 'testcontainers[mysql,kafka]>=4.10.0', + 'testcontainers[mysql,kafka]>=3.0.3,<4.0.0', 'cryptography>=41.0.2', 'hypothesis>5.0.0,<7.0.0', 'virtualenv-clone>=0.5,<1.0', From 88aedfdfdcaf7a6f84215cf829a357f95736db76 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 17:41:26 +0000 Subject: [PATCH 12/35] change mysql image version --- sdks/python/apache_beam/yaml/integration_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/yaml/integration_tests.py b/sdks/python/apache_beam/yaml/integration_tests.py index b076656e3200..3fe3e962dd47 100644 --- a/sdks/python/apache_beam/yaml/integration_tests.py +++ b/sdks/python/apache_beam/yaml/integration_tests.py @@ -228,7 +228,7 @@ def temp_mysql_database(): with the MySQL database during setup. Exception: Any other exception encountered during the setup process. """ - with MySqlContainer() as mysql_container: + with MySqlContainer(image="mysql:5.7.17") as mysql_container: try: # Make connection to temp database and create tmp table engine = sqlalchemy.create_engine(mysql_container.get_connection_url()) From f30553ec93360b13f8d93f5478ee09e460a36a51 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 18:20:13 +0000 Subject: [PATCH 13/35] revert image change --- .../yaml/extended_tests/mysql.yaml | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml b/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml index 570902b633bb..19c6774b2252 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -# fixtures: -# - name: TEMP_DB -# type: "apache_beam.yaml.integration_tests.temp_mysql_database" +fixtures: + - name: TEMP_DB + type: "apache_beam.yaml.integration_tests.temp_mysql_database" pipelines: # MySql write pipeline @@ -30,24 +30,24 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - # - type: WriteToMySql - # config: - # url: "{TEMP_DB}" - # query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" + - type: WriteToMySql + config: + url: "{TEMP_DB}" + query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" - # # MySql read pipeline - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromMySql - # config: - # url: "{TEMP_DB}" - # query: "SELECT * FROM tmp_table" - # driver_class_name: "com.mysql.cj.jdbc.Driver" - # - type: AssertEqual - # config: - # elements: - # - {value: 123, rank: 0} - # - {value: 456, rank: 1} - # - {value: 789, rank: 2} + # MySql read pipeline + - pipeline: + type: chain + transforms: + - type: ReadFromMySql + config: + url: "{TEMP_DB}" + query: "SELECT * FROM tmp_table" + driver_class_name: "com.mysql.cj.jdbc.Driver" + - type: AssertEqual + config: + elements: + - {value: 123, rank: 0} + - {value: 456, rank: 1} + - {value: 789, rank: 2} \ No newline at end of file From c2593d31a05f0290556df0bda3655386a50ee372 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 18:20:54 +0000 Subject: [PATCH 14/35] revert image change again :) --- sdks/python/apache_beam/yaml/integration_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/yaml/integration_tests.py b/sdks/python/apache_beam/yaml/integration_tests.py index 3fe3e962dd47..b076656e3200 100644 --- a/sdks/python/apache_beam/yaml/integration_tests.py +++ b/sdks/python/apache_beam/yaml/integration_tests.py @@ -228,7 +228,7 @@ def temp_mysql_database(): with the MySQL database during setup. Exception: Any other exception encountered during the setup process. """ - with MySqlContainer(image="mysql:5.7.17") as mysql_container: + with MySqlContainer() as mysql_container: try: # Make connection to temp database and create tmp table engine = sqlalchemy.create_engine(mysql_container.get_connection_url()) From ceeee21066fca4787583061e031bdf626c7fcb03 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 18:22:53 +0000 Subject: [PATCH 15/35] add comments to mysql again --- .../yaml/extended_tests/mysql.yaml | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml b/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml index 19c6774b2252..570902b633bb 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -fixtures: - - name: TEMP_DB - type: "apache_beam.yaml.integration_tests.temp_mysql_database" +# fixtures: +# - name: TEMP_DB +# type: "apache_beam.yaml.integration_tests.temp_mysql_database" pipelines: # MySql write pipeline @@ -30,24 +30,24 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - - type: WriteToMySql - config: - url: "{TEMP_DB}" - query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" + # - type: WriteToMySql + # config: + # url: "{TEMP_DB}" + # query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" - # MySql read pipeline - - pipeline: - type: chain - transforms: - - type: ReadFromMySql - config: - url: "{TEMP_DB}" - query: "SELECT * FROM tmp_table" - driver_class_name: "com.mysql.cj.jdbc.Driver" - - type: AssertEqual - config: - elements: - - {value: 123, rank: 0} - - {value: 456, rank: 1} - - {value: 789, rank: 2} + # # MySql read pipeline + # - pipeline: + # type: chain + # transforms: + # - type: ReadFromMySql + # config: + # url: "{TEMP_DB}" + # query: "SELECT * FROM tmp_table" + # driver_class_name: "com.mysql.cj.jdbc.Driver" + # - type: AssertEqual + # config: + # elements: + # - {value: 123, rank: 0} + # - {value: 456, rank: 1} + # - {value: 789, rank: 2} \ No newline at end of file From f6138ecb9f83a7e8de035bf71e801e705ad43aee Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 20:13:22 +0000 Subject: [PATCH 16/35] shift post commit files to different folders --- .../extended_tests/{ => data}/enrichment.yaml | 0 .../extended_tests/{ => data}/tfrecord.yaml | 0 .../{ => databases}/bigquery.yaml | 0 .../extended_tests/{ => databases}/jdbc.yaml | 0 .../extended_tests/{ => databases}/mysql.yaml | 38 +++++++++---------- .../{ => databases}/oracle.yaml | 0 .../{ => databases}/postgres.yaml | 0 .../{ => databases}/spanner.yaml | 0 .../{ => databases}/sqlserver.yaml | 0 .../extended_tests/{ => messaging}/kafka.yaml | 0 .../{ => messaging}/pubsub.yaml | 0 11 files changed, 19 insertions(+), 19 deletions(-) rename sdks/python/apache_beam/yaml/extended_tests/{ => data}/enrichment.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => data}/tfrecord.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => databases}/bigquery.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => databases}/jdbc.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => databases}/mysql.yaml (65%) rename sdks/python/apache_beam/yaml/extended_tests/{ => databases}/oracle.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => databases}/postgres.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => databases}/spanner.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => databases}/sqlserver.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => messaging}/kafka.yaml (100%) rename sdks/python/apache_beam/yaml/extended_tests/{ => messaging}/pubsub.yaml (100%) diff --git a/sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml b/sdks/python/apache_beam/yaml/extended_tests/data/enrichment.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/enrichment.yaml rename to sdks/python/apache_beam/yaml/extended_tests/data/enrichment.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml b/sdks/python/apache_beam/yaml/extended_tests/data/tfrecord.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/tfrecord.yaml rename to sdks/python/apache_beam/yaml/extended_tests/data/tfrecord.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/bigquery.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/bigquery.yaml rename to sdks/python/apache_beam/yaml/extended_tests/databases/bigquery.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/jdbc.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/jdbc.yaml rename to sdks/python/apache_beam/yaml/extended_tests/databases/jdbc.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/mysql.yaml similarity index 65% rename from sdks/python/apache_beam/yaml/extended_tests/mysql.yaml rename to sdks/python/apache_beam/yaml/extended_tests/databases/mysql.yaml index 570902b633bb..d2173d9d1b1d 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/mysql.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/databases/mysql.yaml @@ -30,24 +30,24 @@ pipelines: - {value: 123, rank: 0} - {value: 456, rank: 1} - {value: 789, rank: 2} - # - type: WriteToMySql - # config: - # url: "{TEMP_DB}" - # query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" + - type: WriteToMySql + config: + url: "{TEMP_DB}" + query: "INSERT INTO tmp_table (value, `rank`) VALUES(?,?)" - # # MySql read pipeline - # - pipeline: - # type: chain - # transforms: - # - type: ReadFromMySql - # config: - # url: "{TEMP_DB}" - # query: "SELECT * FROM tmp_table" - # driver_class_name: "com.mysql.cj.jdbc.Driver" - # - type: AssertEqual - # config: - # elements: - # - {value: 123, rank: 0} - # - {value: 456, rank: 1} - # - {value: 789, rank: 2} + # MySql read pipeline + - pipeline: + type: chain + transforms: + - type: ReadFromMySql + config: + url: "{TEMP_DB}" + query: "SELECT * FROM tmp_table" + driver_class_name: "com.mysql.cj.jdbc.Driver" + - type: AssertEqual + config: + elements: + - {value: 123, rank: 0} + - {value: 456, rank: 1} + - {value: 789, rank: 2} \ No newline at end of file diff --git a/sdks/python/apache_beam/yaml/extended_tests/oracle.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/oracle.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/oracle.yaml rename to sdks/python/apache_beam/yaml/extended_tests/databases/oracle.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/postgres.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/postgres.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/postgres.yaml rename to sdks/python/apache_beam/yaml/extended_tests/databases/postgres.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/spanner.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/spanner.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/spanner.yaml rename to sdks/python/apache_beam/yaml/extended_tests/databases/spanner.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/sqlserver.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/sqlserver.yaml rename to sdks/python/apache_beam/yaml/extended_tests/databases/sqlserver.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/kafka.yaml b/sdks/python/apache_beam/yaml/extended_tests/messaging/kafka.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/kafka.yaml rename to sdks/python/apache_beam/yaml/extended_tests/messaging/kafka.yaml diff --git a/sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml b/sdks/python/apache_beam/yaml/extended_tests/messaging/pubsub.yaml similarity index 100% rename from sdks/python/apache_beam/yaml/extended_tests/pubsub.yaml rename to sdks/python/apache_beam/yaml/extended_tests/messaging/pubsub.yaml From bbf5b6229ca098d52f98c35ed91791252ba22cf4 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 20:14:14 +0000 Subject: [PATCH 17/35] rename to Data version --- ...n => beam_PostCommit_Yaml_Data_Xlang_Direct.json} | 2 +- ...ml => beam_PostCommit_Yaml_Data_Xlang_Direct.yml} | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) rename .github/trigger_files/{beam_PostCommit_Yaml_Xlang_Direct.json => beam_PostCommit_Yaml_Data_Xlang_Direct.json} (84%) rename .github/workflows/{beam_PostCommit_Yaml_Xlang_Direct.yml => beam_PostCommit_Yaml_Data_Xlang_Direct.yml} (90%) diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json similarity index 84% rename from .github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json rename to .github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json index 541dc4ea8e87..a975cd1cd104 100644 --- a/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json +++ b/.github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json @@ -1,4 +1,4 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run", - "revision": 2 + "revision": 1 } diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml similarity index 90% rename from .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml rename to .github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml index 97b56a31ef69..112231de5908 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml @@ -13,13 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: PostCommit YAML Xlang Direct +name: PostCommit YAML Data Xlang Direct on: schedule: - cron: '30 5/6 * * *' pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json'] + paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json'] workflow_dispatch: #Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event @@ -59,8 +59,8 @@ jobs: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: - job_name: ["beam_PostCommit_Yaml_Xlang_Direct"] - job_phrase: ["Run Yaml_Xlang_Direct PostCommit"] + job_name: ["beam_PostCommit_Yaml_Data_Xlang_Direct"] + job_phrase: ["Run Yaml_Data_Xlang_Direct PostCommit"] steps: - uses: actions/checkout@v4 - name: Setup repository @@ -76,10 +76,10 @@ jobs: java-version: '11' - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2 - - name: run PostCommit Yaml Xlang Direct script + - name: run PostCommit Yaml Data Xlang Direct script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:postCommitYamlIntegrationTests -PbeamPythonExtra=ml_test + gradle-command: :sdks:python:postCommitYamlDataIntegrationTests -PbeamPythonExtra=ml_test - name: Archive Python Test Results uses: actions/upload-artifact@v4 if: failure() From 364bd007a8bbe6bdc524def4051eed0142731f2e Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 20:14:40 +0000 Subject: [PATCH 18/35] add databases version --- ...ostCommit_Yaml_Databases_Xlang_Direct.json | 4 + ...PostCommit_Yaml_Databases_Xlang_Direct.yml | 96 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 .github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json create mode 100644 .github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json new file mode 100644 index 000000000000..a975cd1cd104 --- /dev/null +++ b/.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json @@ -0,0 +1,4 @@ +{ + "comment": "Modify this file in a trivial way to cause this test suite to run", + "revision": 1 +} diff --git a/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml new file mode 100644 index 000000000000..0a25b09e44fd --- /dev/null +++ b/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit YAML Databases Xlang Direct + +on: + schedule: + - cron: '30 5/6 * * *' + pull_request_target: + paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json'] + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' + cancel-in-progress: true + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Yaml_Xlang_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request_target' || + (github.event_name == 'schedule' && github.repository == 'apache/beam') + runs-on: ubuntu-latest + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Yaml_Databases_Xlang_Direct"] + job_phrase: ["Run Yaml_Databases_Xlang_Direct PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: default + java-version: '11' + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + - name: run PostCommit Yaml Databases Xlang Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:postCommitYamlDatabasesIntegrationTests -PbeamPythonExtra=ml_test + - name: Archive Python Test Results + uses: actions/upload-artifact@v4 + if: failure() + with: + name: Python Test Results + path: '**/pytest*.xml' + - name: Publish Python Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/pytest*.xml' + large_files: true \ No newline at end of file From d525d7a42f9974d532835f4482deb9eeab7e1de6 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 20:14:56 +0000 Subject: [PATCH 19/35] add messaging version --- ...ostCommit_Yaml_Messaging_Xlang_Direct.json | 4 + ...PostCommit_Yaml_Messaging_Xlang_Direct.yml | 96 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 .github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json create mode 100644 .github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json new file mode 100644 index 000000000000..a975cd1cd104 --- /dev/null +++ b/.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json @@ -0,0 +1,4 @@ +{ + "comment": "Modify this file in a trivial way to cause this test suite to run", + "revision": 1 +} diff --git a/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml new file mode 100644 index 000000000000..2dcd24bba8ed --- /dev/null +++ b/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit YAML Messaging Xlang Direct + +on: + schedule: + - cron: '30 5/6 * * *' + pull_request_target: + paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json'] + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' + cancel-in-progress: true + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Yaml_Xlang_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request_target' || + (github.event_name == 'schedule' && github.repository == 'apache/beam') + runs-on: ubuntu-latest + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Yaml_Messaging_Xlang_Direct"] + job_phrase: ["Run Yaml_Messaging_Xlang_Direct PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: default + java-version: '11' + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + - name: run PostCommit Yaml Messaging Xlang Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:postCommitYamlMessagingIntegrationTests -PbeamPythonExtra=ml_test + - name: Archive Python Test Results + uses: actions/upload-artifact@v4 + if: failure() + with: + name: Python Test Results + path: '**/pytest*.xml' + - name: Publish Python Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/pytest*.xml' + large_files: true \ No newline at end of file From 3cd913df0baa4eff14eb3c6ccafaaccb59cf34db Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 20:15:22 +0000 Subject: [PATCH 20/35] update readme for three post commits --- .github/workflows/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index a7cdf99d53cb..ce8d5ef6e6b5 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -401,7 +401,9 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit XVR PythonUsingJavaSQL Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | N/A |`beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.json`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml?query=event%3Aschedule) | | [ PostCommit XVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | N/A |`beam_PostCommit_XVR_Samza.json`| [![.github/workflows/beam_PostCommit_XVR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml?query=event%3Aschedule) | | [ PostCommit XVR Spark3 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | N/A |`beam_PostCommit_XVR_Spark3.json`| [![.github/workflows/beam_PostCommit_XVR_Spark3.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml?query=event%3Aschedule) | -| [ PostCommit YAML Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml?query=event%3Aschedule) | +| [ PostCommit YAML Data Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Data_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml?query=event%3Aschedule) | +| [ PostCommit YAML Databases Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Databases_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml?query=event%3Aschedule) | +| [ PostCommit YAML Messaging Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Messaging_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml?query=event%3Aschedule) | | [ Python Validates Container Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml) | ['3.8','3.9','3.10','3.11'] |`beam_Python_ValidatesContainer_Dataflow_ARM.json`|[![.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml?query=event%3Aschedule) | ### PerformanceTests and Benchmark Jobs From d10571720655208b371763fdffc4ec13337ea389 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 20:16:27 +0000 Subject: [PATCH 21/35] update gradle with new post commits --- sdks/python/build.gradle | 46 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 771b413aa8b0..e1073e0fc994 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -138,13 +138,13 @@ tasks.register("yamlIntegrationTests") { doLast { exec { executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0" + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py" } } } -tasks.register("postCommitYamlIntegrationTests") { - description "Runs postcommit integration tests for yaml pipelines." +tasks.register("postCommitYamlDataIntegrationTests") { + description "Runs postcommit integration tests for yaml pipelines dealing with data formats or processing." dependsOn installGcpTest // Need to build all expansion services referenced in apache_beam/yaml/*.* @@ -157,7 +157,45 @@ tasks.register("postCommitYamlIntegrationTests") { doLast { exec { executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='extended_tests' --timeout=1200" + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --test_files_dir='extended_tests/data'" + } + } +} + +tasks.register("postCommitYamlDatabasesIntegrationTests") { + description "Runs postcommit integration tests for yaml pipelines dealing with databases." + + dependsOn installGcpTest + // Need to build all expansion services referenced in apache_beam/yaml/*.* + // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq + dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" + dependsOn ":sdks:java:extensions:sql:expansion-service:shadowJar" + dependsOn ":sdks:java:io:expansion-service:build" + dependsOn ":sdks:java:io:google-cloud-platform:expansion-service:build" + + doLast { + exec { + executable 'sh' + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='extended_tests/databases'" + } + } +} + +tasks.register("postCommitYamlMessagingIntegrationTests") { + description "Runs postcommit integration tests for yaml pipelines dealing with messaging." + + dependsOn installGcpTest + // Need to build all expansion services referenced in apache_beam/yaml/*.* + // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq + dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" + dependsOn ":sdks:java:extensions:sql:expansion-service:shadowJar" + dependsOn ":sdks:java:io:expansion-service:build" + dependsOn ":sdks:java:io:google-cloud-platform:expansion-service:build" + + doLast { + exec { + executable 'sh' + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --test_files_dir='extended_tests/messaging'" } } } From dae1836c21ae5063b2e6e9957d6dc86990e7368a Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 20:45:49 +0000 Subject: [PATCH 22/35] fix job names --- .github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml | 2 +- .../workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml | 2 +- .../workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml index 112231de5908..b7d3c881304d 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml @@ -49,7 +49,7 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Yaml_Xlang_Direct: + beam_PostCommit_Yaml_Data_Xlang_Direct: if: | github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || diff --git a/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml index 0a25b09e44fd..1ffac12e867b 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml @@ -49,7 +49,7 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Yaml_Xlang_Direct: + beam_PostCommit_Yaml_Databases_Xlang_Direct: if: | github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || diff --git a/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml index 2dcd24bba8ed..567aadff2bdf 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml @@ -49,7 +49,7 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Yaml_Xlang_Direct: + beam_PostCommit_Yaml_Messaging_Xlang_Direct: if: | github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || From 2b6c2b64dd11c2d2e8f1fa30b61672cde94c3098 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Tue, 10 Jun 2025 21:03:02 +0000 Subject: [PATCH 23/35] uncomment fixture on mysql --- .../apache_beam/yaml/extended_tests/databases/mysql.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/yaml/extended_tests/databases/mysql.yaml b/sdks/python/apache_beam/yaml/extended_tests/databases/mysql.yaml index d2173d9d1b1d..19c6774b2252 100644 --- a/sdks/python/apache_beam/yaml/extended_tests/databases/mysql.yaml +++ b/sdks/python/apache_beam/yaml/extended_tests/databases/mysql.yaml @@ -15,9 +15,9 @@ # limitations under the License. # -# fixtures: -# - name: TEMP_DB -# type: "apache_beam.yaml.integration_tests.temp_mysql_database" +fixtures: + - name: TEMP_DB + type: "apache_beam.yaml.integration_tests.temp_mysql_database" pipelines: # MySql write pipeline From 0912a361a00d1e44c6a85dd87bde144e228ef596 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 14:55:41 +0000 Subject: [PATCH 24/35] switch back to one workflow and update readme as such --- .../beam_PostCommit_Yaml_Xlang_Direct.json | 4 + .github/workflows/README.md | 4 +- .../beam_PostCommit_Yaml_Xlang_Direct.yml | 106 ++++++++++++++++++ 3 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 .github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json create mode 100644 .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json new file mode 100644 index 000000000000..541dc4ea8e87 --- /dev/null +++ b/.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json @@ -0,0 +1,4 @@ +{ + "comment": "Modify this file in a trivial way to cause this test suite to run", + "revision": 2 +} diff --git a/.github/workflows/README.md b/.github/workflows/README.md index ce8d5ef6e6b5..a7cdf99d53cb 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -401,9 +401,7 @@ PostCommit Jobs run in a schedule against master branch and generally do not get | [ PostCommit XVR PythonUsingJavaSQL Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | N/A |`beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.json`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml?query=event%3Aschedule) | | [ PostCommit XVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | N/A |`beam_PostCommit_XVR_Samza.json`| [![.github/workflows/beam_PostCommit_XVR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml?query=event%3Aschedule) | | [ PostCommit XVR Spark3 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | N/A |`beam_PostCommit_XVR_Spark3.json`| [![.github/workflows/beam_PostCommit_XVR_Spark3.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml?query=event%3Aschedule) | -| [ PostCommit YAML Data Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Data_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml?query=event%3Aschedule) | -| [ PostCommit YAML Databases Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Databases_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml?query=event%3Aschedule) | -| [ PostCommit YAML Messaging Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Messaging_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml?query=event%3Aschedule) | +| [ PostCommit YAML Xlang Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml) | N/A |`beam_PostCommit_Yaml_Xlang_Direct.json`| [![.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml?query=event%3Aschedule) | | [ Python Validates Container Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml) | ['3.8','3.9','3.10','3.11'] |`beam_Python_ValidatesContainer_Dataflow_ARM.json`|[![.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml?query=event%3Aschedule) | ### PerformanceTests and Benchmark Jobs diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml new file mode 100644 index 000000000000..927df414d12e --- /dev/null +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit YAML Xlang Direct + +on: + schedule: + - cron: '30 5/6 * * *' + pull_request_target: + paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Xlang_Direct.json'] + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' + cancel-in-progress: true + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Yaml_Xlang_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request_target' || + (github.event_name == 'schedule' && github.repository == 'apache/beam') + runs-on: ubuntu-latest + timeout-minutes: 120 # Adjusted timeout for potentially longer total run time + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: + - "beam_PostCommit_Yaml_Data_Xlang_Direct" + - "beam_PostCommit_Yaml_Messaging_Xlang_Direct" + - "beam_PostCommit_Yaml_Databases_Xlang_Direct" + job_phrase: + - "Run Yaml_Data_Xlang_Direct PostCommit" + - "Run Yaml_Messaging_Xlang_Direct PostCommit" + - "Run Yaml_Databases_Xlang_Direct PostCommit" + gradle_command: + - ":sdks:python:postCommitYamlDataIntegrationTests -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlMessagingIntegrationTests -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlDatabasesIntegrationTests -PbeamPythonExtra=ml_test" + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: default + java-version: '11' + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + - name: run PostCommit Yaml Xlang Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: ${{ matrix.gradle_command }} + - name: Archive Python Test Results + uses: actions/upload-artifact@v4 + if: failure() + with: + name: Python Test Results + path: '**/pytest*.xml' + - name: Publish Python Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/pytest*.xml' + large_files: true \ No newline at end of file From ba4eb6f84d35374b7e98b5a1dabfd1a9faefc0fa Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 14:56:21 +0000 Subject: [PATCH 25/35] remove old workflow files --- ...eam_PostCommit_Yaml_Data_Xlang_Direct.json | 4 - ...ostCommit_Yaml_Databases_Xlang_Direct.json | 4 - ...ostCommit_Yaml_Messaging_Xlang_Direct.json | 4 - ...beam_PostCommit_Yaml_Data_Xlang_Direct.yml | 96 ------------------- ...PostCommit_Yaml_Databases_Xlang_Direct.yml | 96 ------------------- ...PostCommit_Yaml_Messaging_Xlang_Direct.yml | 96 ------------------- 6 files changed, 300 deletions(-) delete mode 100644 .github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json delete mode 100644 .github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json delete mode 100644 .github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json delete mode 100644 .github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml delete mode 100644 .github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml delete mode 100644 .github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json deleted file mode 100644 index a975cd1cd104..000000000000 --- a/.github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "comment": "Modify this file in a trivial way to cause this test suite to run", - "revision": 1 -} diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json deleted file mode 100644 index a975cd1cd104..000000000000 --- a/.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "comment": "Modify this file in a trivial way to cause this test suite to run", - "revision": 1 -} diff --git a/.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json b/.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json deleted file mode 100644 index a975cd1cd104..000000000000 --- a/.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "comment": "Modify this file in a trivial way to cause this test suite to run", - "revision": 1 -} diff --git a/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml deleted file mode 100644 index b7d3c881304d..000000000000 --- a/.github/workflows/beam_PostCommit_Yaml_Data_Xlang_Direct.yml +++ /dev/null @@ -1,96 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: PostCommit YAML Data Xlang Direct - -on: - schedule: - - cron: '30 5/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Data_Xlang_Direct.json'] - workflow_dispatch: - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: write - checks: write - contents: read - deployments: read - id-token: none - issues: write - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_Yaml_Data_Xlang_Direct: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') - runs-on: ubuntu-latest - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - strategy: - matrix: - job_name: ["beam_PostCommit_Yaml_Data_Xlang_Direct"] - job_phrase: ["Run Yaml_Data_Xlang_Direct PostCommit"] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - with: - python-version: default - java-version: '11' - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - name: run PostCommit Yaml Data Xlang Direct script - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :sdks:python:postCommitYamlDataIntegrationTests -PbeamPythonExtra=ml_test - - name: Archive Python Test Results - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Python Test Results - path: '**/pytest*.xml' - - name: Publish Python Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/pytest*.xml' - large_files: true \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml deleted file mode 100644 index 1ffac12e867b..000000000000 --- a/.github/workflows/beam_PostCommit_Yaml_Databases_Xlang_Direct.yml +++ /dev/null @@ -1,96 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: PostCommit YAML Databases Xlang Direct - -on: - schedule: - - cron: '30 5/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Databases_Xlang_Direct.json'] - workflow_dispatch: - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: write - checks: write - contents: read - deployments: read - id-token: none - issues: write - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_Yaml_Databases_Xlang_Direct: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') - runs-on: ubuntu-latest - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - strategy: - matrix: - job_name: ["beam_PostCommit_Yaml_Databases_Xlang_Direct"] - job_phrase: ["Run Yaml_Databases_Xlang_Direct PostCommit"] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - with: - python-version: default - java-version: '11' - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - name: run PostCommit Yaml Databases Xlang Direct script - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :sdks:python:postCommitYamlDatabasesIntegrationTests -PbeamPythonExtra=ml_test - - name: Archive Python Test Results - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Python Test Results - path: '**/pytest*.xml' - - name: Publish Python Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/pytest*.xml' - large_files: true \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml deleted file mode 100644 index 567aadff2bdf..000000000000 --- a/.github/workflows/beam_PostCommit_Yaml_Messaging_Xlang_Direct.yml +++ /dev/null @@ -1,96 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: PostCommit YAML Messaging Xlang Direct - -on: - schedule: - - cron: '30 5/6 * * *' - pull_request_target: - paths: ['release/trigger_all_tests.json', '.github/trigger_files/beam_PostCommit_Yaml_Messaging_Xlang_Direct.json'] - workflow_dispatch: - -#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event -permissions: - actions: write - pull-requests: write - checks: write - contents: read - deployments: read - id-token: none - issues: write - discussions: read - packages: read - pages: read - repository-projects: read - security-events: read - statuses: read - -# This allows a subsequently queued workflow run to interrupt previous runs -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}' - cancel-in-progress: true - -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - -jobs: - beam_PostCommit_Yaml_Messaging_Xlang_Direct: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') - runs-on: ubuntu-latest - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - strategy: - matrix: - job_name: ["beam_PostCommit_Yaml_Messaging_Xlang_Direct"] - job_phrase: ["Run Yaml_Messaging_Xlang_Direct PostCommit"] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - with: - python-version: default - java-version: '11' - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - name: run PostCommit Yaml Messaging Xlang Direct script - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: :sdks:python:postCommitYamlMessagingIntegrationTests -PbeamPythonExtra=ml_test - - name: Archive Python Test Results - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Python Test Results - path: '**/pytest*.xml' - - name: Publish Python Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/pytest*.xml' - large_files: true \ No newline at end of file From c193e678111aa9001f26865bda92a2092aa0ed5f Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 14:58:51 +0000 Subject: [PATCH 26/35] update order and remove comment --- .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index 927df414d12e..2667ca7ba46e 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -55,22 +55,22 @@ jobs: github.event_name == 'pull_request_target' || (github.event_name == 'schedule' && github.repository == 'apache/beam') runs-on: ubuntu-latest - timeout-minutes: 120 # Adjusted timeout for potentially longer total run time + timeout-minutes: 120 name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: job_name: - "beam_PostCommit_Yaml_Data_Xlang_Direct" - - "beam_PostCommit_Yaml_Messaging_Xlang_Direct" - "beam_PostCommit_Yaml_Databases_Xlang_Direct" + - "beam_PostCommit_Yaml_Messaging_Xlang_Direct" job_phrase: - "Run Yaml_Data_Xlang_Direct PostCommit" - - "Run Yaml_Messaging_Xlang_Direct PostCommit" - "Run Yaml_Databases_Xlang_Direct PostCommit" + - "Run Yaml_Messaging_Xlang_Direct PostCommit" gradle_command: - ":sdks:python:postCommitYamlDataIntegrationTests -PbeamPythonExtra=ml_test" - - ":sdks:python:postCommitYamlMessagingIntegrationTests -PbeamPythonExtra=ml_test" - ":sdks:python:postCommitYamlDatabasesIntegrationTests -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlMessagingIntegrationTests -PbeamPythonExtra=ml_test" steps: - uses: actions/checkout@v4 - name: Setup repository From bc7d86e4e4bbaa50305c843e3660d896b789314e Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 15:18:50 +0000 Subject: [PATCH 27/35] update gradle with parameterized options --- .../beam_PostCommit_Yaml_Xlang_Direct.yml | 6 +- sdks/python/build.gradle | 57 ++++++------------- 2 files changed, 20 insertions(+), 43 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index 2667ca7ba46e..f0817f6bd1d2 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -68,9 +68,9 @@ jobs: - "Run Yaml_Databases_Xlang_Direct PostCommit" - "Run Yaml_Messaging_Xlang_Direct PostCommit" gradle_command: - - ":sdks:python:postCommitYamlDataIntegrationTests -PbeamPythonExtra=ml_test" - - ":sdks:python:postCommitYamlDatabasesIntegrationTests -PbeamPythonExtra=ml_test" - - ":sdks:python:postCommitYamlMessagingIntegrationTests -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlDataIntegrationTests -PyamlTestSet=data -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlDatabasesIntegrationTests -PyamlTestSet=databases -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlMessagingIntegrationTests -PyamlTestSet=messaging -PbeamPythonExtra=ml_test" steps: - uses: actions/checkout@v4 - name: Setup repository diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index e1073e0fc994..91b15e259cdb 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -143,8 +143,8 @@ tasks.register("yamlIntegrationTests") { } } -tasks.register("postCommitYamlDataIntegrationTests") { - description "Runs postcommit integration tests for yaml pipelines dealing with data formats or processing." +tasks.register("postCommitYamlIntegrationTests") { + description "Runs postcommit integration tests for yaml pipelines - parameterized by yamlTestSet." dependsOn installGcpTest // Need to build all expansion services referenced in apache_beam/yaml/*.* @@ -155,47 +155,24 @@ tasks.register("postCommitYamlDataIntegrationTests") { dependsOn ":sdks:java:io:google-cloud-platform:expansion-service:build" doLast { - exec { - executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --test_files_dir='extended_tests/data'" + def testSet = project.findProperty('yamlTestSet') ?: 'data' + def test_files_dir + switch (testSet) { + case 'data': + test_files_dir = 'extended_tests/data' + break + case 'databases': + test_files_dir = 'extended_tests/databases' + break + case 'messaging': + test_files_dir = 'extended_tests/messaging' + break + default: + throw StopExecutionException("Unknown yamlTestSet: ${testSet}. Must be one of 'data', 'databases', or 'messaging'.") } - } -} - -tasks.register("postCommitYamlDatabasesIntegrationTests") { - description "Runs postcommit integration tests for yaml pipelines dealing with databases." - - dependsOn installGcpTest - // Need to build all expansion services referenced in apache_beam/yaml/*.* - // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq - dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" - dependsOn ":sdks:java:extensions:sql:expansion-service:shadowJar" - dependsOn ":sdks:java:io:expansion-service:build" - dependsOn ":sdks:java:io:google-cloud-platform:expansion-service:build" - - doLast { - exec { - executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='extended_tests/databases'" - } - } -} - -tasks.register("postCommitYamlMessagingIntegrationTests") { - description "Runs postcommit integration tests for yaml pipelines dealing with messaging." - - dependsOn installGcpTest - // Need to build all expansion services referenced in apache_beam/yaml/*.* - // grep -oh 'sdk.*Jar' sdks/python/apache_beam/yaml/*.yaml | sort | uniq - dependsOn ":sdks:java:extensions:schemaio-expansion-service:shadowJar" - dependsOn ":sdks:java:extensions:sql:expansion-service:shadowJar" - dependsOn ":sdks:java:io:expansion-service:build" - dependsOn ":sdks:java:io:google-cloud-platform:expansion-service:build" - - doLast { exec { executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --test_files_dir='extended_tests/messaging'" + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --test_files_dir='${test_files_dir}'" } } } From 027dee21f75944d5d104b884ca82d0344d1cc1f6 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 15:49:27 +0000 Subject: [PATCH 28/35] update gradle command calls to correct location --- .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index f0817f6bd1d2..09e132e4a8fe 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -68,9 +68,9 @@ jobs: - "Run Yaml_Databases_Xlang_Direct PostCommit" - "Run Yaml_Messaging_Xlang_Direct PostCommit" gradle_command: - - ":sdks:python:postCommitYamlDataIntegrationTests -PyamlTestSet=data -PbeamPythonExtra=ml_test" - - ":sdks:python:postCommitYamlDatabasesIntegrationTests -PyamlTestSet=databases -PbeamPythonExtra=ml_test" - - ":sdks:python:postCommitYamlMessagingIntegrationTests -PyamlTestSet=messaging -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=data -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=databases -PbeamPythonExtra=ml_test" + - ":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=messaging -PbeamPythonExtra=ml_test" steps: - uses: actions/checkout@v4 - name: Setup repository From 19de3bcfe7acad3b4d9b52d79877355ecbcc8ce1 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 16:21:21 +0000 Subject: [PATCH 29/35] update workflow to three jobs explicit --- .../beam_PostCommit_Yaml_Xlang_Direct.yml | 111 +++++++++++++++--- 1 file changed, 97 insertions(+), 14 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index 09e132e4a8fe..6445a0f0d26a 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -49,28 +49,111 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Yaml_Xlang_Direct: + beam_PostCommit_Yaml_Data_Xlang_Direct: if: | github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || (github.event_name == 'schedule' && github.repository == 'apache/beam') runs-on: ubuntu-latest - timeout-minutes: 120 + timeout-minutes: 100 name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: - job_name: - - "beam_PostCommit_Yaml_Data_Xlang_Direct" - - "beam_PostCommit_Yaml_Databases_Xlang_Direct" - - "beam_PostCommit_Yaml_Messaging_Xlang_Direct" - job_phrase: - - "Run Yaml_Data_Xlang_Direct PostCommit" - - "Run Yaml_Databases_Xlang_Direct PostCommit" - - "Run Yaml_Messaging_Xlang_Direct PostCommit" - gradle_command: - - ":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=data -PbeamPythonExtra=ml_test" - - ":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=databases -PbeamPythonExtra=ml_test" - - ":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=messaging -PbeamPythonExtra=ml_test" + job_name: ["beam_PostCommit_Yaml_Data_Xlang_Direct"] + job_phrase: ["Run Yaml_Data_Xlang_Direct PostCommit"] + gradle_command: [":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=data -PbeamPythonExtra=ml_test"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: default + java-version: '11' + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + - name: run PostCommit Yaml Xlang Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: ${{ matrix.gradle_command }} + - name: Archive Python Test Results + uses: actions/upload-artifact@v4 + if: failure() + with: + name: Python Test Results + path: '**/pytest*.xml' + - name: Publish Python Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/pytest*.xml' + large_files: true + beam_PostCommit_Yaml_Databases_Xlang_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request_target' || + (github.event_name == 'schedule' && github.repository == 'apache/beam') + runs-on: ubuntu-latest + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Yaml_Databases_Xlang_Direct"] + job_phrase: ["Run Yaml_Databases_Xlang_Direct PostCommit"] + gradle_command: [":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=databases -PbeamPythonExtra=ml_test"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: default + java-version: '11' + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + - name: run PostCommit Yaml Xlang Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: ${{ matrix.gradle_command }} + - name: Archive Python Test Results + uses: actions/upload-artifact@v4 + if: failure() + with: + name: Python Test Results + path: '**/pytest*.xml' + - name: Publish Python Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/pytest*.xml' + large_files: true + beam_PostCommit_Yaml_Messaging_Xlang_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request_target' || + (github.event_name == 'schedule' && github.repository == 'apache/beam') + runs-on: ubuntu-latest + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Yaml_Messaging_Xlang_Direct"] + job_phrase: ["Run Yaml_Messaging_Xlang_Direct PostCommit"] + gradle_command: [":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=messaging -PbeamPythonExtra=ml_test"] steps: - uses: actions/checkout@v4 - name: Setup repository From 28d35c1b365eb4becc626e91bc9e4ff570a1f7d3 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 17:18:27 +0000 Subject: [PATCH 30/35] add back Bigquery deselect --- sdks/python/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 91b15e259cdb..14c364817146 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -172,7 +172,7 @@ tasks.register("postCommitYamlIntegrationTests") { } exec { executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --test_files_dir='${test_files_dir}'" + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='${test_files_dir}'" } } } From 72ab27b98f8ad24996c1287c752412a0228a6e91 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 18:04:14 +0000 Subject: [PATCH 31/35] fix mysql teardown error --- sdks/python/apache_beam/yaml/integration_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/yaml/integration_tests.py b/sdks/python/apache_beam/yaml/integration_tests.py index b076656e3200..6824105737a8 100644 --- a/sdks/python/apache_beam/yaml/integration_tests.py +++ b/sdks/python/apache_beam/yaml/integration_tests.py @@ -228,7 +228,7 @@ def temp_mysql_database(): with the MySQL database during setup. Exception: Any other exception encountered during the setup process. """ - with MySqlContainer() as mysql_container: + with MySqlContainer(init=True) as mysql_container: try: # Make connection to temp database and create tmp table engine = sqlalchemy.create_engine(mysql_container.get_connection_url()) From 7d732c95d28be9a814a0cc095d0cf622d1705854 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 19:30:17 +0000 Subject: [PATCH 32/35] Simplify down to one from three explicit jobs --- .../beam_PostCommit_Yaml_Xlang_Direct.yml | 103 +----------------- sdks/python/build.gradle | 40 ++++--- 2 files changed, 28 insertions(+), 115 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index 6445a0f0d26a..4e62834e1c8c 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -60,16 +60,16 @@ jobs: strategy: matrix: job_name: ["beam_PostCommit_Yaml_Data_Xlang_Direct"] - job_phrase: ["Run Yaml_Data_Xlang_Direct PostCommit"] - gradle_command: [":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=data -PbeamPythonExtra=ml_test"] + job_phrase: ["Run Yaml_Xlang_Direct PostCommit"] + test_set: ["data", databases", "messaging"] steps: - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: - comment_phrase: ${{ matrix.job_phrase }} + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.test_set }} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + github_job: ${{ matrix.job_name }} ${{ matrix.test_set }} (${{ matrix.job_phrase }} ${{ matrix.test_set }}) - name: Setup environment uses: ./.github/actions/setup-environment-action with: @@ -80,7 +80,7 @@ jobs: - name: run PostCommit Yaml Xlang Direct script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: ${{ matrix.gradle_command }} + gradle-command: :sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=${{ matrix.test_set }} -PbeamPythonExtra=ml_test - name: Archive Python Test Results uses: actions/upload-artifact@v4 if: failure() @@ -95,95 +95,4 @@ jobs: comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/pytest*.xml' large_files: true - beam_PostCommit_Yaml_Databases_Xlang_Direct: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') - runs-on: ubuntu-latest - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - strategy: - matrix: - job_name: ["beam_PostCommit_Yaml_Databases_Xlang_Direct"] - job_phrase: ["Run Yaml_Databases_Xlang_Direct PostCommit"] - gradle_command: [":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=databases -PbeamPythonExtra=ml_test"] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - with: - python-version: default - java-version: '11' - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - name: run PostCommit Yaml Xlang Direct script - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: ${{ matrix.gradle_command }} - - name: Archive Python Test Results - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Python Test Results - path: '**/pytest*.xml' - - name: Publish Python Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/pytest*.xml' - large_files: true - beam_PostCommit_Yaml_Messaging_Xlang_Direct: - if: | - github.event_name == 'workflow_dispatch' || - github.event_name == 'pull_request_target' || - (github.event_name == 'schedule' && github.repository == 'apache/beam') - runs-on: ubuntu-latest - timeout-minutes: 100 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - strategy: - matrix: - job_name: ["beam_PostCommit_Yaml_Messaging_Xlang_Direct"] - job_phrase: ["Run Yaml_Messaging_Xlang_Direct PostCommit"] - gradle_command: [":sdks:python:postCommitYamlIntegrationTests -PyamlTestSet=messaging -PbeamPythonExtra=ml_test"] - steps: - - uses: actions/checkout@v4 - - name: Setup repository - uses: ./.github/actions/setup-action - with: - comment_phrase: ${{ matrix.job_phrase }} - github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup environment - uses: ./.github/actions/setup-environment-action - with: - python-version: default - java-version: '11' - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v2 - - name: run PostCommit Yaml Xlang Direct script - uses: ./.github/actions/gradle-command-self-hosted-action - with: - gradle-command: ${{ matrix.gradle_command }} - - name: Archive Python Test Results - uses: actions/upload-artifact@v4 - if: failure() - with: - name: Python Test Results - path: '**/pytest*.xml' - - name: Publish Python Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - if: always() - with: - commit: '${{ env.prsha || env.GITHUB_SHA }}' - comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} - files: '**/pytest*.xml' - large_files: true \ No newline at end of file + diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 14c364817146..c8f02262d3fd 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -155,24 +155,28 @@ tasks.register("postCommitYamlIntegrationTests") { dependsOn ":sdks:java:io:google-cloud-platform:expansion-service:build" doLast { - def testSet = project.findProperty('yamlTestSet') ?: 'data' - def test_files_dir - switch (testSet) { - case 'data': - test_files_dir = 'extended_tests/data' - break - case 'databases': - test_files_dir = 'extended_tests/databases' - break - case 'messaging': - test_files_dir = 'extended_tests/messaging' - break - default: - throw StopExecutionException("Unknown yamlTestSet: ${testSet}. Must be one of 'data', 'databases', or 'messaging'.") - } - exec { - executable 'sh' - args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='${test_files_dir}'" + def testSetInput = project.findProperty('yamlTestSet') ?: 'data,databases,messaging' + def testSetsToRun = testSetInput.tokenize(',').collect { it.trim() }.findAll { !it.isEmpty() } + testSetsToRun.each { currentTestSet -> + def test_files_dir + + switch (currentTestSet) { + case 'data': + test_files_dir = 'extended_tests/data' + break + case 'databases': + test_files_dir = 'extended_tests/databases' + break + case 'messaging': + test_files_dir = 'extended_tests/messaging' + break + default: + throw StopExecutionException("Unknown yamlTestSet: ${testSet}. Must be one of 'data', 'databases', or 'messaging'.") + } + exec { + executable 'sh' + args '-c', "${envdir}/bin/pytest -v apache_beam/yaml/integration_tests.py --deselect apache_beam/yaml/integration_tests.py::BigqueryTest::test_ReadFromBigQuery_ExternalJavaProvider_0 --test_files_dir='${test_files_dir}'" + } } } } From 4783bdaf0203849472573c32465899a66124d02c Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 19:39:32 +0000 Subject: [PATCH 33/35] remove tab --- .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index 4e62834e1c8c..78a537b25534 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -61,7 +61,7 @@ jobs: matrix: job_name: ["beam_PostCommit_Yaml_Data_Xlang_Direct"] job_phrase: ["Run Yaml_Xlang_Direct PostCommit"] - test_set: ["data", databases", "messaging"] + test_set: ["data", databases", "messaging"] steps: - uses: actions/checkout@v4 - name: Setup repository From 6bdf8b47417f80e44f270d4a99750bcba0d11cb7 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 19:48:49 +0000 Subject: [PATCH 34/35] remove Data --- .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index 78a537b25534..f643a240ce32 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -49,7 +49,7 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Yaml_Data_Xlang_Direct: + beam_PostCommit_Yaml_Xlang_Direct: if: | github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target' || @@ -59,7 +59,7 @@ jobs: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: - job_name: ["beam_PostCommit_Yaml_Data_Xlang_Direct"] + job_name: ["beam_PostCommit_Yaml_Xlang_Direct"] job_phrase: ["Run Yaml_Xlang_Direct PostCommit"] test_set: ["data", databases", "messaging"] steps: From f5c5ca9d927c5116932433eb687d7d5d48858b95 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Wed, 11 Jun 2025 20:02:41 +0000 Subject: [PATCH 35/35] fix parsing parameters --- .github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml index f643a240ce32..1f3d51475664 100644 --- a/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml +++ b/.github/workflows/beam_PostCommit_Yaml_Xlang_Direct.yml @@ -61,7 +61,7 @@ jobs: matrix: job_name: ["beam_PostCommit_Yaml_Xlang_Direct"] job_phrase: ["Run Yaml_Xlang_Direct PostCommit"] - test_set: ["data", databases", "messaging"] + test_set: ["data", "databases", "messaging"] steps: - uses: actions/checkout@v4 - name: Setup repository