From 75529f95425e729aa15a768274377d5a892737c5 Mon Sep 17 00:00:00 2001 From: Robert Burke Date: Wed, 11 Aug 2021 16:38:05 -0700 Subject: [PATCH 1/5] [GoSDK Infra] Limit simultaneous tests binaries to 3. Executing all test binaries at the same time seems to be overloading the mini-local Flink runner, setting a limit to avoid overwhelming it. --- sdks/go/test/run_validatesrunner_tests.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index 0f404b5d7808..8e422718f26a 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -79,6 +79,9 @@ RUNNER=portable # packages are executed in parallel. TIMEOUT=1h +# Default limit on simultaneous test binaries/packages being executed. +PARALLEL=3 + # Where to store integration test outputs. GCS_LOCATION=gs://temp-storage-for-end-to-end-tests @@ -356,6 +359,11 @@ else CONTAINER=apache/beam_go_sdk fi + +# -p dictates the number of parallel test binaries running tests. +# --parallel indicates within a test binary level of parallism. +ARGS="$ARGS -p $PARALLEL" + # Assemble test arguments and pipeline options. ARGS="$ARGS --timeout=$TIMEOUT" ARGS="$ARGS --runner=$RUNNER" @@ -394,7 +402,7 @@ if [[ "$JENKINS" == true ]]; then echo ">>> For Jenkins environment, changing test targets to: $TESTS" echo ">>> RUNNING $RUNNER integration tests with pipeline options: $ARGS" - GOPATH=$TEMP_GOPATH go test -v $TESTS $ARGS \ + GOPATH=$TEMP_GOPATH go test -v $TESTS $ARGS \ || TEST_EXIT_CODE=$? # don't fail fast here; clean up environment before exiting else echo ">>> RUNNING $RUNNER integration tests with pipeline options: $ARGS" From cd618cb7409e115e24ec0bdaa34a724c4122e9fa Mon Sep 17 00:00:00 2001 From: Robert Burke Date: Wed, 11 Aug 2021 17:41:25 -0700 Subject: [PATCH 2/5] Rename -p value to SIMULTANEOUS, make it a flag. --- sdks/go/test/run_validatesrunner_tests.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index 8e422718f26a..0c4cfb1e7c3b 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -33,6 +33,8 @@ # --tests -> A space-seperated list of targets for "go test". Defaults to # all packages in the integration and regression directories. # --timeout -> Timeout for the go test command, on a per-package level. +# --simultaneous -> Number of simultaneous packages to test. +# Controls the -p flag for the go test command. Defaults to 3. # --endpoint -> An endpoint for an existing job server outside the script. # If present, job server jar flags are ignored. # --test_expansion_jar -> Filepath to jar for an expansion service, for @@ -80,7 +82,7 @@ RUNNER=portable TIMEOUT=1h # Default limit on simultaneous test binaries/packages being executed. -PARALLEL=3 +SIMULTANEOUS=3 # Where to store integration test outputs. GCS_LOCATION=gs://temp-storage-for-end-to-end-tests @@ -123,6 +125,11 @@ case $key in shift # past argument shift # past value ;; + --simultaneous) + SIMULTANEOUS="$2" + shift # past argument + shift # past value + ;; --project) PROJECT="$2" shift # past argument @@ -360,9 +367,9 @@ else fi -# -p dictates the number of parallel test binaries running tests. -# --parallel indicates within a test binary level of parallism. -ARGS="$ARGS -p $PARALLEL" +# The go test flag -p dictates the number of simultaneous test binaries running tests. +# Note that --parallel indicates within a test binary level of parallism. +ARGS="$ARGS -p $SIMULTANEOUS" # Assemble test arguments and pipeline options. ARGS="$ARGS --timeout=$TIMEOUT" From 27178b48a9e0f5d1e0924560d6b6a5eba13a6269 Mon Sep 17 00:00:00 2001 From: Robert Burke Date: Wed, 11 Aug 2021 17:44:27 -0700 Subject: [PATCH 3/5] Make flink run test binaries one at a time. --- sdks/go/test/run_validatesrunner_tests.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index 0c4cfb1e7c3b..69f1f1c5a8f3 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -232,6 +232,10 @@ print(s.getsockname()[1]) s.close() " +# The go test flag -p dictates the number of simultaneous test binaries running tests. +# Note that --parallel indicates within a test binary level of parallism. +ARGS="-p $SIMULTANEOUS" + # Set up environment based on runner. if [[ "$RUNNER" == "dataflow" ]]; then if [[ -z "$DATAFLOW_WORKER_JAR" ]]; then @@ -265,6 +269,7 @@ elif [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || --job-port $JOB_PORT \ --expansion-port 0 \ --artifact-port 0 & + ARGS="-p 1" elif [[ "$RUNNER" == "samza" ]]; then java \ -jar $SAMZA_JOB_SERVER_JAR \ @@ -366,11 +371,6 @@ else CONTAINER=apache/beam_go_sdk fi - -# The go test flag -p dictates the number of simultaneous test binaries running tests. -# Note that --parallel indicates within a test binary level of parallism. -ARGS="$ARGS -p $SIMULTANEOUS" - # Assemble test arguments and pipeline options. ARGS="$ARGS --timeout=$TIMEOUT" ARGS="$ARGS --runner=$RUNNER" From 647a1cf4d03c57306a22586f6ef1b8520308abbe Mon Sep 17 00:00:00 2001 From: Robert Burke Date: Wed, 11 Aug 2021 17:49:28 -0700 Subject: [PATCH 4/5] rm extra space, clarify runner discrepency. --- sdks/go/test/run_validatesrunner_tests.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index 69f1f1c5a8f3..b854df0bff37 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -34,7 +34,8 @@ # all packages in the integration and regression directories. # --timeout -> Timeout for the go test command, on a per-package level. # --simultaneous -> Number of simultaneous packages to test. -# Controls the -p flag for the go test command. Defaults to 3. +# Controls the -p flag for the go test command. +# Not used for Flink, Spark, or Samza runners. Defaults to 3 otherwise. # --endpoint -> An endpoint for an existing job server outside the script. # If present, job server jar flags are ignored. # --test_expansion_jar -> Filepath to jar for an expansion service, for @@ -409,7 +410,7 @@ if [[ "$JENKINS" == true ]]; then echo ">>> For Jenkins environment, changing test targets to: $TESTS" echo ">>> RUNNING $RUNNER integration tests with pipeline options: $ARGS" - GOPATH=$TEMP_GOPATH go test -v $TESTS $ARGS \ + GOPATH=$TEMP_GOPATH go test -v $TESTS $ARGS \ || TEST_EXIT_CODE=$? # don't fail fast here; clean up environment before exiting else echo ">>> RUNNING $RUNNER integration tests with pipeline options: $ARGS" From 1c2efdab8371403eb1ff32142a11b35796f30184 Mon Sep 17 00:00:00 2001 From: Robert Burke Date: Wed, 11 Aug 2021 17:52:01 -0700 Subject: [PATCH 5/5] Clean up to always lean on SIMULTANEOUS variable. --- sdks/go/test/run_validatesrunner_tests.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index b854df0bff37..a73a1604e5c8 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -233,10 +233,6 @@ print(s.getsockname()[1]) s.close() " -# The go test flag -p dictates the number of simultaneous test binaries running tests. -# Note that --parallel indicates within a test binary level of parallism. -ARGS="-p $SIMULTANEOUS" - # Set up environment based on runner. if [[ "$RUNNER" == "dataflow" ]]; then if [[ -z "$DATAFLOW_WORKER_JAR" ]]; then @@ -270,14 +266,14 @@ elif [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || --job-port $JOB_PORT \ --expansion-port 0 \ --artifact-port 0 & - ARGS="-p 1" + SIMULTANEOUS=1 elif [[ "$RUNNER" == "samza" ]]; then java \ -jar $SAMZA_JOB_SERVER_JAR \ --job-port $JOB_PORT \ --expansion-port 0 \ --artifact-port 0 & - ARGS="-p 1" + SIMULTANEOUS=1 elif [[ "$RUNNER" == "spark" ]]; then java \ -jar $SPARK_JOB_SERVER_JAR \ @@ -285,7 +281,7 @@ elif [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || --job-port $JOB_PORT \ --expansion-port 0 \ --artifact-port 0 & - ARGS="-p 1" # Spark runner fails if jobs are run concurrently. + SIMULTANEOUS=1 # Spark runner fails if jobs are run concurrently. elif [[ "$RUNNER" == "portable" ]]; then python3 \ -m apache_beam.runners.portability.local_job_service_main \ @@ -372,6 +368,10 @@ else CONTAINER=apache/beam_go_sdk fi +# The go test flag -p dictates the number of simultaneous test binaries running tests. +# Note that --parallel indicates within a test binary level of parallism. +ARGS="$ARGS -p $SIMULTANEOUS" + # Assemble test arguments and pipeline options. ARGS="$ARGS --timeout=$TIMEOUT" ARGS="$ARGS --runner=$RUNNER"