From 9bd7aa0d3e40a6edde76c1b20ced683e1c521885 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Tue, 20 Sep 2022 13:24:45 -0500 Subject: [PATCH 1/7] Migrate and sharding Python Postcommit ML from Jenkins to GA --- .../workflows/job-postcommit-python-ml.yml | 68 +++++++++++++++++++ CI.md | 8 +++ .../python/test-suites/dataflow/common.gradle | 25 +++++++ 3 files changed, 101 insertions(+) create mode 100644 .github/workflows/job-postcommit-python-ml.yml diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml new file mode 100644 index 000000000000..ab7a918f02fd --- /dev/null +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This workflow will run the tests of python ML with DataflowRunner. + +name: PostCommit Python ML + +on: + schedule: + - cron: '0 */6 * * *' + push: + branches: ['master', 'release-*'] + tags: ['v*'] +permissions: read-all + +jobs: + set-properties: + runs-on: self-hosted + outputs: + properties: ${{ steps.test-properties.outputs.properties }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + persist-credentials: false + submodules: recursive + - id: test-properties + uses: ./.github/actions/setup-default-test-properties + + python_ml: + needs: set-properties + name: Python ML + runs-on: self-hosted + strategy: + fail-fast: true + matrix: + version: ${{fromJson(needs.set-properties.outputs.properties).PythonTestProperties.ALL_SUPPORTED_VERSIONS}} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + persist-credentials: false + submodules: recursive + - name: Set python version + run: echo "PYTHON_VERSION=$(echo ${{ matrix.version }} | sed -e 's/\.//g')" >> $GITHUB_ENV + - name: Setup environment + uses: ./.github/actions/setup-self-hosted-action + with: + requires-go-18: false + - name: Run :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory + arguments: "-PtestDir=apache_beam/ml" \ No newline at end of file diff --git a/CI.md b/CI.md index ed75a4dd4433..8fab4433d6c2 100644 --- a/CI.md +++ b/CI.md @@ -125,6 +125,14 @@ Service Account shall have following permissions ([IAM roles](https://cloud.goog | Java Wordcount Direct Runner | Runs Java WordCount example with Direct Runner. | Yes | Yes | Yes | - | | Java Wordcount Dataflow | Runs Java WordCount example with DataFlow Runner. | - | Yes | Yes | Yes | +### PostCommit Workflows + +#### PostCommit Python ML - [job-postcommit-python-ml.yml](.github/workflows/job-postcommit-python-ml.yml) +| Job | Description | Pull Request Run | Direct Push/Merge Run | Scheduled Run | Requires GCP Credentials | +|-----------|-----------------------------------------|------------------|-----------------------|---------------|--------------------------| +| Python ML | Runs Python Python ML Integration Tests | - | Yes | Yes | - | + + ### GitHub Action Tips * If you introduce changes to the workflow it is possible that your changes will not be present in the check run triggered in Pull Request. diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 70284d42d35e..5391dff10e1d 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -101,6 +101,31 @@ task preCommitIT_V2{ dependsOn preCommitIT_streaming_V2 } +task postCommitITByTestDirectory { + def testDir = project.hasProperty('testDir') ? project.testDir : '' + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' + dependsOn ':runners:google-cloud-dataflow-java:worker:shadowJar' + + def dataflowWorkerJar = project(":runners:google-cloud-dataflow-java:worker").shadowJar.archivePath + + doLast { + def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"] + def argMap = [ + "test_opts": testOpts + [testDir], + "sdk_location": files(configurations.distTarBall.files).singleFile, + "worker_jar": dataflowWorkerJar, + "suite": "postCommitIT-df${pythonVersionSuffix}", + "collect": "it_postcommit" + ] + def cmdArgs = mapToArgString(argMap) + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" + } + } +} + task postCommitIT { dependsOn 'installGcpTest' dependsOn ':sdks:python:sdist' From bba8de5e1998e72f54278df581709b56fd1df0b4 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Fri, 23 Sep 2022 13:13:30 -0500 Subject: [PATCH 2/7] Change from self-hosted to [self-hosted, ubuntu-20.04] --- .github/workflows/job-postcommit-python-ml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index ab7a918f02fd..9954593d0d1b 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -44,7 +44,7 @@ jobs: python_ml: needs: set-properties name: Python ML - runs-on: self-hosted + runs-on: [self-hosted, ubuntu-20.04] strategy: fail-fast: true matrix: From 3a5e6bee2f514dc1b21fe28070132eb4f5f54c39 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Thu, 29 Sep 2022 16:39:02 -0500 Subject: [PATCH 3/7] Remove submodules: recursive and set fail-fast: false --- .github/workflows/job-postcommit-python-ml.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index 7b00b9880e99..6665c605bb77 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -38,7 +38,6 @@ jobs: uses: actions/checkout@v3 with: persist-credentials: false - submodules: recursive - id: test-properties uses: ./.github/actions/setup-default-test-properties @@ -47,7 +46,7 @@ jobs: name: Python ML runs-on: [self-hosted, ubuntu-20.04] strategy: - fail-fast: true + fail-fast: false matrix: version: ${{fromJson(needs.set-properties.outputs.properties).PythonTestProperties.ALL_SUPPORTED_VERSIONS}} steps: @@ -55,7 +54,6 @@ jobs: uses: actions/checkout@v3 with: persist-credentials: false - submodules: recursive - name: Set python version run: echo "PYTHON_VERSION=$(echo ${{ matrix.version }} | sed -e 's/\.//g')" >> $GITHUB_ENV - name: Setup environment From 3d22da0e5e7416a724df4dac363114b5589bf6ec Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez <74670721+benWize@users.noreply.github.com> Date: Wed, 5 Oct 2022 19:44:36 -0500 Subject: [PATCH 4/7] Update CI.md Co-authored-by: Anand Inguva <34158215+AnandInguva@users.noreply.github.com> --- CI.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CI.md b/CI.md index 7caac98ac6e4..aebbad8b0d07 100644 --- a/CI.md +++ b/CI.md @@ -140,7 +140,7 @@ Service Account shall have following permissions ([IAM roles](https://cloud.goog ### PostCommit Workflows | Workflow | Description | Requires GCP Credentials | |--------------------------------------------------------------------------------|-----------------------------------------|--------------------------| -| [job-postcommit-python-ml.yml](.github/workflows/job-postcommit-python-ml.yml) | Runs Python Python ML Integration Tests | Yes | +| [job-postcommit-python-ml.yml](.github/workflows/job-postcommit-python-ml.yml) | Runs Python ML Integration Tests | Yes | ### GitHub Action Tips From e418fce16cdfd065aa5c2ad326ff694186ef8186 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Thu, 6 Oct 2022 13:26:08 -0500 Subject: [PATCH 5/7] Add torchInferenceTest to ML workflow --- .github/workflows/job-postcommit-python-ml.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index 6665c605bb77..bee94f1fe8bd 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -64,4 +64,8 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory - arguments: "-PtestDir=apache_beam/ml" \ No newline at end of file + arguments: "-PtestDir=apache_beam/ml" + - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest \ No newline at end of file From 0bc2df202c2ea379d39733646b3e0b16c5c06743 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Fri, 14 Oct 2022 14:37:51 -0500 Subject: [PATCH 6/7] Change torchinferenceTest to inferencePostCommitIT to include sklearnInferenceTest --- .github/workflows/job-postcommit-python-ml.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index bee94f1fe8bd..8ab077292934 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -65,7 +65,7 @@ jobs: with: gradle-command: :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory arguments: "-PtestDir=apache_beam/ml" - - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest + - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest \ No newline at end of file + gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT \ No newline at end of file From b33b230a65f9ae1cbc9cffd44f882491e64b9c7e Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Fri, 9 Dec 2022 17:02:39 -0600 Subject: [PATCH 7/7] Setup service account with action --- .github/workflows/job-postcommit-python-ml.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index 8ab077292934..46bda3d36535 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -54,6 +54,10 @@ jobs: uses: actions/checkout@v3 with: persist-credentials: false + - name: Setup service account + uses: ./.github/actions/activate-service-account + with: + GCP_SERVICE_ACCOUNT: ${{secrets.GCP_SELF_HOSTED_SA_KEY}} - name: Set python version run: echo "PYTHON_VERSION=$(echo ${{ matrix.version }} | sed -e 's/\.//g')" >> $GITHUB_ENV - name: Setup environment