From f1c142214584a645434adeeb8f0fc897ac0db604 Mon Sep 17 00:00:00 2001 From: driazati Date: Tue, 4 Jan 2022 13:12:37 -0800 Subject: [PATCH 1/2] use xdist --- Jenkinsfile | 8 +++----- python/tvm/testing/plugin.py | 17 +++++++++++++++ tests/python/relay/test_op_grad_level4.py | 1 + tests/scripts/setup-pytest-env.sh | 23 ++++++++++++++++++++- tests/scripts/task_ci_setup.sh | 2 +- tests/scripts/task_python_integration.sh | 4 ++-- tests/scripts/task_show_node_info.sh | 25 +++++++++++++++++++++++ 7 files changed, 71 insertions(+), 9 deletions(-) create mode 100755 tests/scripts/task_show_node_info.sh diff --git a/Jenkinsfile b/Jenkinsfile index ef13eb85c29a..decc92a47614 100755 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -91,17 +91,15 @@ def per_exec_ws(folder) { // initialize source codes def init_git() { + checkout scm // Add more info about job node sh ( - script: """ - echo "INFO: NODE_NAME=${NODE_NAME} EXECUTOR_NUMBER=${EXECUTOR_NUMBER}" - """, + script: './tests/scripts/task_show_node_info.sh', label: "Show executor node info", ) - checkout scm retry(5) { timeout(time: 2, unit: 'MINUTES') { - sh (script: 'git submodule update --init -f', label: "Update git submodules") + sh(script: 'git submodule update --init -f', label: "Update git submodules") } } } diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py index e90bd5e6dbf5..3db1ae518bd8 100644 --- a/python/tvm/testing/plugin.py +++ b/python/tvm/testing/plugin.py @@ -34,6 +34,11 @@ import pytest import _pytest +try: + from xdist.scheduler.loadscope import LoadScopeScheduling +except ImportError: + pass + import tvm from tvm.testing import utils @@ -318,3 +323,15 @@ def _parametrize_correlated_parameters(metafunc): names = ",".join(name for name, values in params) value_sets = zip(*[values for name, values in params]) metafunc.parametrize(names, value_sets, indirect=True, ids=ids) + + +def pytest_xdist_make_scheduler(config, log): + class TvmTestScheduler(LoadScopeScheduling): + def _split_scope(self, nodeid): + # NOTE: test_tvm_testing_features contains parametrization-related tests, and must be + # serialized on a single host. + if "test_tvm_testing_features" in nodeid: + return "functional-tests" + return nodeid + + return TvmTestScheduler(config, log) diff --git a/tests/python/relay/test_op_grad_level4.py b/tests/python/relay/test_op_grad_level4.py index 17d30cacac41..d535522f44b8 100644 --- a/tests/python/relay/test_op_grad_level4.py +++ b/tests/python/relay/test_op_grad_level4.py @@ -45,6 +45,7 @@ def verify_max_grad(d_shape, axis=None, keepdims=False, exclude=False): check_grad(fwd_func, scale=1e-3) +@pytest.mark.forked def test_max_grad(): verify_max_grad((10, 10), axis=None) verify_max_grad((10, 10), axis=-1) diff --git a/tests/scripts/setup-pytest-env.sh b/tests/scripts/setup-pytest-env.sh index bcd27a16f659..d86a60e251bd 100755 --- a/tests/scripts/setup-pytest-env.sh +++ b/tests/scripts/setup-pytest-env.sh @@ -26,6 +26,8 @@ else fi set -u +set -x + export TVM_PATH=`pwd` export PYTHONPATH="${TVM_PATH}/python" @@ -37,14 +39,33 @@ function run_pytest() { shift local test_suite_name="$1" shift - if [ -z "${ffi_type}" -o -z "${test_suite_name}" ]; then + + # Default to -n0 (which disables pytest-xdist entirely), but use PYTEST_CPUS + # if it was set manually by the caller + xdist_cpus="-n${PYTEST_CPUS:-0}" + if [[ -n ${PYTEST_CPUS+x} ]] && [ "$PYTEST_CPUS" == "auto" ]; then + # If PYTEST_CPUS is set to 'auto', use max(nproc - 1, 0) CPUs + num_cpus="$(nproc)" + + if [ "$num_cpus" -gt 3 ]; then + # Limit concurrency to 2 jobs max in CI + num_cpus=3 + fi + + xdist_cpus=-n$((num_cpus == 1 ? 1 : num_cpus - 1)) + fi + + if [ -z "${ffi_type}" ] || [ -z "${test_suite_name}" ]; then echo "error: run_pytest called incorrectly: run_pytest ${ffi_type} ${test_suite_name} $@" echo "usage: run_pytest [pytest args...]" exit 2 fi + echo "running with $xdist_cpus" TVM_FFI=${ffi_type} python3 -m pytest \ -o "junit_suite_name=${test_suite_name}-${ffi_type}" \ "--junit-xml=${TVM_PYTEST_RESULT_DIR}/${test_suite_name}-${ffi_type}.xml" \ "--junit-prefix=${ffi_type}" \ + "--durations=20" \ + "$xdist_cpus" \ "$@" } diff --git a/tests/scripts/task_ci_setup.sh b/tests/scripts/task_ci_setup.sh index 33ea484a5268..ac2901d9fc5e 100755 --- a/tests/scripts/task_ci_setup.sh +++ b/tests/scripts/task_ci_setup.sh @@ -30,7 +30,7 @@ set -o pipefail # echo "Additional setup in ${CI_IMAGE_NAME}" -python3 -m pip install --user tlcpack-sphinx-addon==0.2.1 synr==0.6.0 +python3 -m pip install --user tlcpack-sphinx-addon==0.2.1 synr==0.6.0 pytest-forked # Rebuild standalone_crt in build/ tree. This file is not currently archived by pack_lib() in # Jenkinsfile. We expect config.cmake to be present from pack_lib(). diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 4992bfa678de..48108db8cb81 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -16,8 +16,7 @@ # specific language governing permissions and limitations # under the License. -set -e -set -u +set -euxo pipefail source tests/scripts/setup-pytest-env.sh export PYTHONPATH=${PYTHONPATH}:${TVM_PATH}/apps/extension/python @@ -26,6 +25,7 @@ export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}" # to avoid CI CPU thread throttling. export TVM_BIND_THREADS=0 export TVM_NUM_THREADS=2 +export PYTEST_CPUS=auto # NOTE: also set by task_python_integration_gpuonly.sh. if [ -z "${TVM_INTEGRATION_TESTSUITE_NAME:-}" ]; then diff --git a/tests/scripts/task_show_node_info.sh b/tests/scripts/task_show_node_info.sh new file mode 100755 index 000000000000..673b99e9f99c --- /dev/null +++ b/tests/scripts/task_show_node_info.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euxo pipefail + +echo "INFO: NODE_NAME=${NODE_NAME} EXECUTOR_NUMBER=${EXECUTOR_NUMBER}" +lsb_release -a || echo "could not get lsb_release" +pwd +git show --quiet HEAD || echo "could not get HEAD commit" +lscpu || echo "could not lscpu" From 18c18090d014845a59bf1da95a0dd9baf543ab4a Mon Sep 17 00:00:00 2001 From: driazati Date: Tue, 11 Jan 2022 16:49:27 -0800 Subject: [PATCH 2/2] test --- python/tvm/testing/plugin.py | 32 ++++++++++++++------- tests/python/relay/test_vm_serialization.py | 1 + tests/scripts/task_python_integration.sh | 3 ++ 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py index 3db1ae518bd8..ce1d0798f703 100644 --- a/python/tvm/testing/plugin.py +++ b/python/tvm/testing/plugin.py @@ -36,8 +36,10 @@ try: from xdist.scheduler.loadscope import LoadScopeScheduling + + HAVE_XDIST = True except ImportError: - pass + HAVE_XDIST = False import tvm from tvm.testing import utils @@ -325,13 +327,21 @@ def _parametrize_correlated_parameters(metafunc): metafunc.parametrize(names, value_sets, indirect=True, ids=ids) -def pytest_xdist_make_scheduler(config, log): - class TvmTestScheduler(LoadScopeScheduling): - def _split_scope(self, nodeid): - # NOTE: test_tvm_testing_features contains parametrization-related tests, and must be - # serialized on a single host. - if "test_tvm_testing_features" in nodeid: - return "functional-tests" - return nodeid - - return TvmTestScheduler(config, log) +if HAVE_XDIST: + # We need to guard the declaration of this function otherwise pytest + # errors out if pytest-xdist is not installed + def pytest_xdist_make_scheduler(config, log): + """ + Serialize certain tests for pytest-xdist + """ + + class TvmTestScheduler(LoadScopeScheduling): + def _split_scope(self, nodeid): + # NOTE: test_tvm_testing_features contains + # parametrization-related tests, and must be serialized on a + # single host. + if "test_tvm_testing_features" in nodeid: + return "functional-tests" + return nodeid + + return TvmTestScheduler(config, log) diff --git a/tests/python/relay/test_vm_serialization.py b/tests/python/relay/test_vm_serialization.py index 1a49fc5a0184..e9d50c8b7eaa 100644 --- a/tests/python/relay/test_vm_serialization.py +++ b/tests/python/relay/test_vm_serialization.py @@ -301,6 +301,7 @@ def test_vm_shape_of(): tvm.testing.assert_allclose(res.flatten(), data.flatten()) +@pytest.mark.forked def test_dynamic_bcast(): dtype = "float32" x = relay.var("x", shape=(relay.Any(), 2), dtype=dtype) diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 48108db8cb81..171367331b4c 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -65,9 +65,12 @@ run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME} tests/python/integration run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib --ignore=tests/python/contrib/test_ethosu run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib-test_ethosu tests/python/contrib/test_ethosu -n auto +# disable pytest-xdist for relay tests due to deadlocks +# export PYTEST_CPUS=0 # forked is needed because the global registry gets contaminated TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" \ run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay +# export PYTEST_CPUS=auto # Command line driver test run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver