From bfb0bbea996634d0b0805ee5df8fad4bf0ec539e Mon Sep 17 00:00:00 2001 From: Mark Liu Date: Fri, 26 Apr 2019 11:23:16 -0700 Subject: [PATCH 1/4] Refactor Python performance test groovy file for easy configuration. --- .../job_PerformanceTests_Python.groovy | 143 +++++++++++++----- .../job_Performancetests_Python35.groovy | 63 -------- 2 files changed, 101 insertions(+), 105 deletions(-) delete mode 100644 .test-infra/jenkins/job_Performancetests_Python35.groovy diff --git a/.test-infra/jenkins/job_PerformanceTests_Python.groovy b/.test-infra/jenkins/job_PerformanceTests_Python.groovy index 5354d464bd8d..84fbf1f7f58d 100644 --- a/.test-infra/jenkins/job_PerformanceTests_Python.groovy +++ b/.test-infra/jenkins/job_PerformanceTests_Python.groovy @@ -18,46 +18,105 @@ import CommonJobProperties as commonJobProperties -// This job runs the Beam Python performance tests on PerfKit Benchmarker. -job('beam_PerformanceTests_Python'){ - // Set default Beam job properties. - commonJobProperties.setTopLevelMainJobProperties(delegate) - - // Run job in postcommit every 6 hours, don't trigger every push. - commonJobProperties.setAutoJob( - delegate, - 'H */6 * * *') - - // Allows triggering this build against pull requests. - commonJobProperties.enablePhraseTriggeringFromPullRequest( - delegate, - 'Python SDK Performance Test', - 'Run Python Performance Test') - - def pipelineArgs = [ - project: 'apache-beam-testing', - staging_location: 'gs://temp-storage-for-end-to-end-tests/staging-it', - temp_location: 'gs://temp-storage-for-end-to-end-tests/temp-it', - output: 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output' - ] - def pipelineArgList = [] - pipelineArgs.each({ - key, value -> pipelineArgList.add("--$key=$value") - }) - def pipelineArgsJoined = pipelineArgList.join(',') - - def argMap = [ - beam_sdk : 'python', - benchmarks : 'beam_integration_benchmark', - bigquery_table : 'beam_performance.wordcount_py_pkb_results', - beam_it_class : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', - beam_it_module : 'sdks/python', - beam_prebuilt : 'true', // skip beam prebuild - beam_python_sdk_location : 'build/apache-beam.tar.gz', - beam_runner : 'TestDataflowRunner', - beam_it_timeout : '1200', - beam_it_args : pipelineArgsJoined, - ] - - commonJobProperties.buildPerformanceTest(delegate, argMap) + +class PerformanceTestConfigurations { + String jobName + String jobDescription + String jobTriggerPhrase + String buildSchedule = 'H */6 * * *' // every 6 hours + String benchmarkName = 'beam_integration_benchmark' + String sdk = 'python' + String bigqueryTable + String itClass + String itModule + Boolean skipPrebuild = false + String pythonSdkLocation + String runner = 'TestDataflowRunner' + Integer itTimeout = 1200 + Map extraPipelineArgs +} + + +def testConfigurations = [ + new PerformanceTestConfigurations( + jobName : 'beam_PerformanceTests_Python', + jobDescription : 'Python SDK Performance Test', + jobTriggerPhrase : 'Run Python Performance Test', + bigqueryTable : 'beam_performance.wordcount_py_pkb_results', + skipPrebuild : true, + pythonSdkLocation : 'build/apache-beam.tar.gz', + itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', + itModule : 'sdks/python', + extraPipelineArgs : [ + project : 'apache-beam-testing', + staging_location: 'gs://temp-storage-for-end-to-end-tests/staging-it', + temp_location : 'gs://temp-storage-for-end-to-end-tests/temp-it', + output : 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output', + ], + ), + new PerformanceTestConfigurations( + jobName : 'beam_PerformanceTests_Python35', + jobDescription : 'Python35 SDK Performance Test', + jobTriggerPhrase : 'Run Python35 Performance Test', + bigqueryTable : 'beam_performance.wordcount_py35_pkb_results', + skipPrebuild : true, + pythonSdkLocation : 'test-suites/dataflow/py35/build/apache-beam.tar.gz', + itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', + itModule : 'sdks/python/test-suites/dataflow/py35', + extraPipelineArgs : [ + project : 'apache-beam-testing', + staging_location: 'gs://temp-storage-for-end-to-end-tests/staging-it', + temp_location : 'gs://temp-storage-for-end-to-end-tests/temp-it', + output : 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output', + ], + ) +] + + +for (testConfig in testConfigurations) { + createPythonPerformanceTestJob(testConfig) +} + + +private void createPythonPerformanceTestJob(PerformanceTestConfigurations testConfig) { + // This job runs the Beam Python performance tests on PerfKit Benchmarker. + job(testConfig.jobName) { + // Set default Beam job properties. + commonJobProperties.setTopLevelMainJobProperties(delegate) + + // Run job in postcommit, don't trigger every push. + commonJobProperties.setAutoJob( + delegate, + testConfig.buildSchedule) + + // Allows triggering this build against pull requests. + commonJobProperties.enablePhraseTriggeringFromPullRequest( + delegate, + testConfig.jobDescription, + testConfig.jobTriggerPhrase) + + // Helper function to join pipeline args from a map. + def joinPipelineArgs = { pipelineArgs -> + def pipelineArgList = [] + pipelineArgs.each({ + key, value -> pipelineArgList.add("--$key=$value") + }) + return pipelineArgList.join(',') + } + + def argMap = [ + beam_sdk : testConfig.sdk, + benchmarks : testConfig.benchmarkName, + bigquery_table : testConfig.bigqueryTable, + beam_it_class : testConfig.itClass, + beam_it_module : testConfig.itModule, + beam_prebuilt : testConfig.skipPrebuild.toString(), + beam_python_sdk_location: testConfig.pythonSdkLocation, + beam_runner : testConfig.runner, + beam_it_timeout : testConfig.itTimeout.toString(), + beam_it_args : joinPipelineArgs(testConfig.extraPipelineArgs), + ] + + commonJobProperties.buildPerformanceTest(delegate, argMap) + } } diff --git a/.test-infra/jenkins/job_Performancetests_Python35.groovy b/.test-infra/jenkins/job_Performancetests_Python35.groovy deleted file mode 100644 index 41e0fcc14ffb..000000000000 --- a/.test-infra/jenkins/job_Performancetests_Python35.groovy +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import CommonJobProperties as commonJobProperties - -// This job runs the Beam Python35 performance benchmark on PerfKit Benchmarker. -job('beam_PerformanceTests_Python35'){ - // Set default Beam job properties. - commonJobProperties.setTopLevelMainJobProperties(delegate) - - // Run job in postcommit every 6 hours, don't trigger every push. - commonJobProperties.setAutoJob( - delegate, - 'H */6 * * *') - - // Allows triggering this build against pull requests. - commonJobProperties.enablePhraseTriggeringFromPullRequest( - delegate, - 'Python35 SDK Performance Test', - 'Run Python35 Performance Test') - - def pipelineArgs = [ - project: 'apache-beam-testing', - staging_location: 'gs://temp-storage-for-end-to-end-tests/staging-it', - temp_location: 'gs://temp-storage-for-end-to-end-tests/temp-it', - output: 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output' - ] - def pipelineArgList = [] - pipelineArgs.each({ - key, value -> pipelineArgList.add("--$key=$value") - }) - def pipelineArgsJoined = pipelineArgList.join(',') - - def argMap = [ - beam_sdk : 'python', - benchmarks : 'beam_integration_benchmark', - bigquery_table : 'beam_performance.wordcount_py35_pkb_results', - beam_it_class : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', - beam_it_module : 'sdks/python/test-suites/dataflow/py35', - beam_prebuilt : 'true', // skip beam prebuild - beam_python_sdk_location : 'test-suites/dataflow/py35/build/apache-beam.tar.gz', - beam_runner : 'TestDataflowRunner', - beam_it_timeout : '1200', - beam_it_args : pipelineArgsJoined, - ] - - commonJobProperties.buildPerformanceTest(delegate, argMap) -} From 8cde119ad556bf3670add7fa3a0f7ecf28f674f4 Mon Sep 17 00:00:00 2001 From: Mark Liu Date: Tue, 7 May 2019 17:03:49 -0700 Subject: [PATCH 2/4] fixup! Group dataflowPipelineArgs for reuse --- .../job_PerformanceTests_Python.groovy | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/.test-infra/jenkins/job_PerformanceTests_Python.groovy b/.test-infra/jenkins/job_PerformanceTests_Python.groovy index 84fbf1f7f58d..124dfd96029f 100644 --- a/.test-infra/jenkins/job_PerformanceTests_Python.groovy +++ b/.test-infra/jenkins/job_PerformanceTests_Python.groovy @@ -36,7 +36,15 @@ class PerformanceTestConfigurations { Map extraPipelineArgs } +// Common pipeline args for Dataflow job. +def dataflowPipelineArgs = [ + project : 'apache-beam-testing', + staging_location: 'gs://temp-storage-for-end-to-end-tests/staging-it', + temp_location : 'gs://temp-storage-for-end-to-end-tests/temp-it', +] + +// Configurations of each Jenkins job. def testConfigurations = [ new PerformanceTestConfigurations( jobName : 'beam_PerformanceTests_Python', @@ -47,11 +55,8 @@ def testConfigurations = [ pythonSdkLocation : 'build/apache-beam.tar.gz', itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', itModule : 'sdks/python', - extraPipelineArgs : [ - project : 'apache-beam-testing', - staging_location: 'gs://temp-storage-for-end-to-end-tests/staging-it', - temp_location : 'gs://temp-storage-for-end-to-end-tests/temp-it', - output : 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output', + extraPipelineArgs : dataflowPipelineArgs + [ + output: 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output' ], ), new PerformanceTestConfigurations( @@ -63,11 +68,8 @@ def testConfigurations = [ pythonSdkLocation : 'test-suites/dataflow/py35/build/apache-beam.tar.gz', itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', itModule : 'sdks/python/test-suites/dataflow/py35', - extraPipelineArgs : [ - project : 'apache-beam-testing', - staging_location: 'gs://temp-storage-for-end-to-end-tests/staging-it', - temp_location : 'gs://temp-storage-for-end-to-end-tests/temp-it', - output : 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output', + extraPipelineArgs : dataflowPipelineArgs + [ + output: 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output' ], ) ] From 116ba2525ecf0f889fb832c30352e748a62e5b37 Mon Sep 17 00:00:00 2001 From: Mark Liu Date: Wed, 8 May 2019 13:24:53 -0700 Subject: [PATCH 3/4] fixit! Address comments and renaming parameters --- .../job_PerformanceTests_Python.groovy | 71 ++++++++++++------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/.test-infra/jenkins/job_PerformanceTests_Python.groovy b/.test-infra/jenkins/job_PerformanceTests_Python.groovy index 124dfd96029f..f88124ecc886 100644 --- a/.test-infra/jenkins/job_PerformanceTests_Python.groovy +++ b/.test-infra/jenkins/job_PerformanceTests_Python.groovy @@ -20,19 +20,33 @@ import CommonJobProperties as commonJobProperties class PerformanceTestConfigurations { + // Name of the Jenkins job String jobName + // Description of the Jenkins job String jobDescription + // Phrase to trigger this Jenkins job String jobTriggerPhrase - String buildSchedule = 'H */6 * * *' // every 6 hours + // Frequency of the job build, default to every 6 hours + String buildSchedule = 'H */6 * * *' + // A benchmark flag, will pass to "--benchmarkName" String benchmarkName = 'beam_integration_benchmark' + // A benchmark flag, will pass to "--beam_sdk" String sdk = 'python' - String bigqueryTable + // A benchmark flag, will pass to "--bigqueryTable" + String resultTable + // A benchmark flag, will pass to "--beam_it_class" String itClass + // A benchmark flag, will pass to "--beam_it_module" String itModule - Boolean skipPrebuild = false - String pythonSdkLocation + // A benchmark flag, will pass to "--beam_prebuilt" + Boolean prebuilt = false + // A benchmark flag, will pass to "--beam_python_sdk_location" + String pythonSdkLocation = '' + // A benchmark flag, will pass to "--beam_runner" String runner = 'TestDataflowRunner' - Integer itTimeout = 1200 + // A benchmark flag, will pass to "--beam_it_timeout" + Integer itTimeoutSec = 1200 + // A benchmark flag, will pass to "--beam_it_args" Map extraPipelineArgs } @@ -50,9 +64,8 @@ def testConfigurations = [ jobName : 'beam_PerformanceTests_Python', jobDescription : 'Python SDK Performance Test', jobTriggerPhrase : 'Run Python Performance Test', - bigqueryTable : 'beam_performance.wordcount_py_pkb_results', - skipPrebuild : true, - pythonSdkLocation : 'build/apache-beam.tar.gz', + resultTable : 'beam_performance.wordcount_py_pkb_results', + prebuilt : true, itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', itModule : 'sdks/python', extraPipelineArgs : dataflowPipelineArgs + [ @@ -63,9 +76,8 @@ def testConfigurations = [ jobName : 'beam_PerformanceTests_Python35', jobDescription : 'Python35 SDK Performance Test', jobTriggerPhrase : 'Run Python35 Performance Test', - bigqueryTable : 'beam_performance.wordcount_py35_pkb_results', - skipPrebuild : true, - pythonSdkLocation : 'test-suites/dataflow/py35/build/apache-beam.tar.gz', + resultTable : 'beam_performance.wordcount_py35_pkb_results', + prebuilt : true, itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', itModule : 'sdks/python/test-suites/dataflow/py35', extraPipelineArgs : dataflowPipelineArgs + [ @@ -97,28 +109,39 @@ private void createPythonPerformanceTestJob(PerformanceTestConfigurations testCo testConfig.jobDescription, testConfig.jobTriggerPhrase) - // Helper function to join pipeline args from a map. - def joinPipelineArgs = { pipelineArgs -> - def pipelineArgList = [] - pipelineArgs.each({ - key, value -> pipelineArgList.add("--$key=$value") - }) - return pipelineArgList.join(',') - } - def argMap = [ beam_sdk : testConfig.sdk, benchmarks : testConfig.benchmarkName, - bigquery_table : testConfig.bigqueryTable, + bigquery_table : testConfig.resultTable, beam_it_class : testConfig.itClass, beam_it_module : testConfig.itModule, - beam_prebuilt : testConfig.skipPrebuild.toString(), - beam_python_sdk_location: testConfig.pythonSdkLocation, + beam_prebuilt : testConfig.prebuilt.toString(), + beam_python_sdk_location: getSDKLocationFromModule(testConfig.pythonSdkLocation, + testConfig.itModule), beam_runner : testConfig.runner, - beam_it_timeout : testConfig.itTimeout.toString(), + beam_it_timeout : testConfig.itTimeoutSec.toString(), beam_it_args : joinPipelineArgs(testConfig.extraPipelineArgs), ] commonJobProperties.buildPerformanceTest(delegate, argMap) } } + + +// Helper function to join pipeline args from a map. +private static String joinPipelineArgs(Map pipelineArgs) { + def pipelineArgList = [] + pipelineArgs.each({ + key, value -> pipelineArgList.add("--$key=$value") + }) + return pipelineArgList.join(',') +} + + +// Get relative path of sdk location based on itModule if the location is not provided. +private static String getSDKLocationFromModule(String pythonSDKLocation, String itModule) { + if (!pythonSDKLocation && itModule.startsWith("sdks/python")) { + return (itModule.substring("sdks/python".length()) + "/build/apache-beam.tar.gz").substring(1) + } + return pythonSDKLocation +} From f16c2fcbf41e265f3a3fb4772337f199f66efe94 Mon Sep 17 00:00:00 2001 From: Mark Liu Date: Fri, 10 May 2019 14:47:15 -0700 Subject: [PATCH 4/4] fixup! address comments --- .../job_PerformanceTests_Python.groovy | 44 +++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/.test-infra/jenkins/job_PerformanceTests_Python.groovy b/.test-infra/jenkins/job_PerformanceTests_Python.groovy index f88124ecc886..732272fd7d5c 100644 --- a/.test-infra/jenkins/job_PerformanceTests_Python.groovy +++ b/.test-infra/jenkins/job_PerformanceTests_Python.groovy @@ -28,25 +28,25 @@ class PerformanceTestConfigurations { String jobTriggerPhrase // Frequency of the job build, default to every 6 hours String buildSchedule = 'H */6 * * *' - // A benchmark flag, will pass to "--benchmarkName" + // A benchmark defined flag, will pass to benchmark as "--benchmarkName" String benchmarkName = 'beam_integration_benchmark' - // A benchmark flag, will pass to "--beam_sdk" - String sdk = 'python' - // A benchmark flag, will pass to "--bigqueryTable" + // A benchmark defined flag, will pass to benchmark as "--bigqueryTable" String resultTable - // A benchmark flag, will pass to "--beam_it_class" + // A benchmark defined flag, will pass to benchmark as "--beam_it_class" String itClass - // A benchmark flag, will pass to "--beam_it_module" + // A benchmark defined flag, will pass to benchmark as "--beam_it_module". + // It's a Gradle project that defines 'integrationTest' task. This task is executed by Perfkit + // Beam benchmark launcher and can be added by enablePythonPerformanceTest() defined in + // BeamModulePlugin. String itModule - // A benchmark flag, will pass to "--beam_prebuilt" - Boolean prebuilt = false - // A benchmark flag, will pass to "--beam_python_sdk_location" + // A benchmark defined flag, will pass to benchmark as "--beam_python_sdk_location". + // It's the location of Python SDK distribution archive which is required for TestDataflowRunner. String pythonSdkLocation = '' - // A benchmark flag, will pass to "--beam_runner" + // A benchmark defined flag, will pass to benchmark as "--beam_runner" String runner = 'TestDataflowRunner' - // A benchmark flag, will pass to "--beam_it_timeout" + // A benchmark defined flag, will pass to benchmark as "--beam_it_timeout" Integer itTimeoutSec = 1200 - // A benchmark flag, will pass to "--beam_it_args" + // A benchmark defined flag, will pass to benchmark as "--beam_it_args" Map extraPipelineArgs } @@ -61,11 +61,10 @@ def dataflowPipelineArgs = [ // Configurations of each Jenkins job. def testConfigurations = [ new PerformanceTestConfigurations( - jobName : 'beam_PerformanceTests_Python', - jobDescription : 'Python SDK Performance Test', - jobTriggerPhrase : 'Run Python Performance Test', - resultTable : 'beam_performance.wordcount_py_pkb_results', - prebuilt : true, + jobName : 'beam_PerformanceTests_WordCountIT_Py27', + jobDescription : 'Python SDK Performance Test - Run WordCountIT in Py27', + jobTriggerPhrase : 'Run Python27 WordCountIT Performance Test', + resultTable : 'beam_performance.wordcount_py27_pkb_results', itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', itModule : 'sdks/python', extraPipelineArgs : dataflowPipelineArgs + [ @@ -73,11 +72,10 @@ def testConfigurations = [ ], ), new PerformanceTestConfigurations( - jobName : 'beam_PerformanceTests_Python35', - jobDescription : 'Python35 SDK Performance Test', - jobTriggerPhrase : 'Run Python35 Performance Test', + jobName : 'beam_PerformanceTests_WordCountIT_Py35', + jobDescription : 'Python SDK Performance Test - Run WordCountIT in Py35', + jobTriggerPhrase : 'Run Python35 WordCountIT Performance Test', resultTable : 'beam_performance.wordcount_py35_pkb_results', - prebuilt : true, itClass : 'apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it', itModule : 'sdks/python/test-suites/dataflow/py35', extraPipelineArgs : dataflowPipelineArgs + [ @@ -110,12 +108,12 @@ private void createPythonPerformanceTestJob(PerformanceTestConfigurations testCo testConfig.jobTriggerPhrase) def argMap = [ - beam_sdk : testConfig.sdk, + beam_sdk : 'python', benchmarks : testConfig.benchmarkName, bigquery_table : testConfig.resultTable, beam_it_class : testConfig.itClass, beam_it_module : testConfig.itModule, - beam_prebuilt : testConfig.prebuilt.toString(), + beam_prebuilt : 'true', // Python benchmark don't need to prebuild repo before running beam_python_sdk_location: getSDKLocationFromModule(testConfig.pythonSdkLocation, testConfig.itModule), beam_runner : testConfig.runner,