From 9bd7aa0d3e40a6edde76c1b20ced683e1c521885 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Tue, 20 Sep 2022 13:24:45 -0500 Subject: [PATCH 1/6] Migrate and sharding Python Postcommit ML from Jenkins to GA --- .../workflows/job-postcommit-python-ml.yml | 68 +++++++++++++++++++ CI.md | 8 +++ .../python/test-suites/dataflow/common.gradle | 25 +++++++ 3 files changed, 101 insertions(+) create mode 100644 .github/workflows/job-postcommit-python-ml.yml diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml new file mode 100644 index 000000000000..ab7a918f02fd --- /dev/null +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This workflow will run the tests of python ML with DataflowRunner. + +name: PostCommit Python ML + +on: + schedule: + - cron: '0 */6 * * *' + push: + branches: ['master', 'release-*'] + tags: ['v*'] +permissions: read-all + +jobs: + set-properties: + runs-on: self-hosted + outputs: + properties: ${{ steps.test-properties.outputs.properties }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + persist-credentials: false + submodules: recursive + - id: test-properties + uses: ./.github/actions/setup-default-test-properties + + python_ml: + needs: set-properties + name: Python ML + runs-on: self-hosted + strategy: + fail-fast: true + matrix: + version: ${{fromJson(needs.set-properties.outputs.properties).PythonTestProperties.ALL_SUPPORTED_VERSIONS}} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + persist-credentials: false + submodules: recursive + - name: Set python version + run: echo "PYTHON_VERSION=$(echo ${{ matrix.version }} | sed -e 's/\.//g')" >> $GITHUB_ENV + - name: Setup environment + uses: ./.github/actions/setup-self-hosted-action + with: + requires-go-18: false + - name: Run :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory + arguments: "-PtestDir=apache_beam/ml" \ No newline at end of file diff --git a/CI.md b/CI.md index ed75a4dd4433..8fab4433d6c2 100644 --- a/CI.md +++ b/CI.md @@ -125,6 +125,14 @@ Service Account shall have following permissions ([IAM roles](https://cloud.goog | Java Wordcount Direct Runner | Runs Java WordCount example with Direct Runner. | Yes | Yes | Yes | - | | Java Wordcount Dataflow | Runs Java WordCount example with DataFlow Runner. | - | Yes | Yes | Yes | +### PostCommit Workflows + +#### PostCommit Python ML - [job-postcommit-python-ml.yml](.github/workflows/job-postcommit-python-ml.yml) +| Job | Description | Pull Request Run | Direct Push/Merge Run | Scheduled Run | Requires GCP Credentials | +|-----------|-----------------------------------------|------------------|-----------------------|---------------|--------------------------| +| Python ML | Runs Python Python ML Integration Tests | - | Yes | Yes | - | + + ### GitHub Action Tips * If you introduce changes to the workflow it is possible that your changes will not be present in the check run triggered in Pull Request. diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 70284d42d35e..5391dff10e1d 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -101,6 +101,31 @@ task preCommitIT_V2{ dependsOn preCommitIT_streaming_V2 } +task postCommitITByTestDirectory { + def testDir = project.hasProperty('testDir') ? project.testDir : '' + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' + dependsOn ':runners:google-cloud-dataflow-java:worker:shadowJar' + + def dataflowWorkerJar = project(":runners:google-cloud-dataflow-java:worker").shadowJar.archivePath + + doLast { + def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"] + def argMap = [ + "test_opts": testOpts + [testDir], + "sdk_location": files(configurations.distTarBall.files).singleFile, + "worker_jar": dataflowWorkerJar, + "suite": "postCommitIT-df${pythonVersionSuffix}", + "collect": "it_postcommit" + ] + def cmdArgs = mapToArgString(argMap) + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" + } + } +} + task postCommitIT { dependsOn 'installGcpTest' dependsOn ':sdks:python:sdist' From bba8de5e1998e72f54278df581709b56fd1df0b4 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Fri, 23 Sep 2022 13:13:30 -0500 Subject: [PATCH 2/6] Change from self-hosted to [self-hosted, ubuntu-20.04] --- .github/workflows/job-postcommit-python-ml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index ab7a918f02fd..9954593d0d1b 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -44,7 +44,7 @@ jobs: python_ml: needs: set-properties name: Python ML - runs-on: self-hosted + runs-on: [self-hosted, ubuntu-20.04] strategy: fail-fast: true matrix: From 3c5a85d1fc534b2d763f38b2bba53df54ed02254 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Thu, 6 Oct 2022 12:16:23 -0500 Subject: [PATCH 3/6] Test run inferencePostCommit --- .github/workflows/job-postcommit-python-ml.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index 7b00b9880e99..e2e20cb7d01a 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -66,4 +66,8 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory - arguments: "-PtestDir=apache_beam/ml" \ No newline at end of file + arguments: "-PtestDir=apache_beam/ml" + - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT \ No newline at end of file From 1f10467445cbe8f8a58033efe84e9a064b52a10e Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Thu, 6 Oct 2022 12:18:10 -0500 Subject: [PATCH 4/6] Test run torchTest --- .github/workflows/job-postcommit-python-ml.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index e2e20cb7d01a..c1007018fd52 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -67,7 +67,7 @@ jobs: with: gradle-command: :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory arguments: "-PtestDir=apache_beam/ml" - - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT + - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT \ No newline at end of file + gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest \ No newline at end of file From a60ca27f129765ea7efbb10ae994e25ee7f5ba89 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Thu, 6 Oct 2022 12:18:42 -0500 Subject: [PATCH 5/6] Testing --- .github/workflows/job-postcommit-python-ml.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index c1007018fd52..be39813962b7 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -26,6 +26,7 @@ on: push: branches: ['master', 'release-*'] tags: ['v*'] + pull_request: permissions: read-all jobs: From 2f6ab7c3631846ddd6496002dd9d69fce518a3d9 Mon Sep 17 00:00:00 2001 From: Benjamin Gonzalez Date: Fri, 14 Oct 2022 14:39:51 -0500 Subject: [PATCH 6/6] Change torchinferenceTest to inferencePostCommitIT to include sklearnInferenceTest --- .github/workflows/job-postcommit-python-ml.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/job-postcommit-python-ml.yml b/.github/workflows/job-postcommit-python-ml.yml index be39813962b7..e0d7867cc56f 100644 --- a/.github/workflows/job-postcommit-python-ml.yml +++ b/.github/workflows/job-postcommit-python-ml.yml @@ -68,7 +68,7 @@ jobs: with: gradle-command: :sdks:python:test-suites:dataflow:py${{env.PYTHON_VERSION}}:postCommitITByTestDirectory arguments: "-PtestDir=apache_beam/ml" - - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest + - name: Run :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:torchInferenceTest \ No newline at end of file + gradle-command: :sdks:python:test-suites:direct:py${{env.PYTHON_VERSION}}:inferencePostCommitIT \ No newline at end of file