From fe1fef4bdb93ac8bc324439981fc7c2f4ae6eea7 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Sun, 26 Apr 2020 17:33:45 -0700 Subject: [PATCH 1/7] update nvidiadocker command & remove cuda compat --- ci/build.py | 3 ++- ci/docker/Dockerfile.build.ubuntu | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/build.py b/ci/build.py index 323a4487fafc..72dfef7c5876 100755 --- a/ci/build.py +++ b/ci/build.py @@ -222,8 +222,9 @@ def container_run(docker_client: SafeDockerClient, # Equivalent command docker_cmd_list = [ - "nvidia-docker" if nvidia_runtime else "docker", + "docker", 'run', + "--gpus all" if nvidia_runtime else "", "--cap-add", "SYS_PTRACE", # Required by ASAN '--rm', diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu index c44a5cce8138..63a4fd3a005d 100644 --- a/ci/docker/Dockerfile.build.ubuntu +++ b/ci/docker/Dockerfile.build.ubuntu @@ -164,5 +164,4 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ FROM gpu as gpuwithcompatenv # TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH. -# This should be fixed and deleted. -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat \ No newline at end of file +# This should be fixed and deleted. \ No newline at end of file From f761b240025803bae47e51eed830e941ff5b9e7b Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Mon, 27 Apr 2020 19:52:24 -0700 Subject: [PATCH 2/7] replace cu101 with cuda since compat is no longer to be used --- ci/jenkins/Jenkins_steps.groovy | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 28d76beabab2..b7c2763f7ec2 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -155,7 +155,7 @@ def compile_unix_int64_gpu() { ws('workspace/build-gpu-int64') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false) utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib) } } @@ -309,7 +309,7 @@ def compile_unix_cmake_gpu() { ws('workspace/build-cmake-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false) utils.pack_lib('cmake_gpu', mx_cmake_lib_cython) } } @@ -323,7 +323,7 @@ def compile_unix_cmake_gpu_no_tvm_op() { ws('workspace/build-cmake-gpu-no-tvm-op') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_tvm_op', false) } } } @@ -336,7 +336,7 @@ def compile_unix_cmake_gpu_no_rtc() { ws('workspace/build-cmake-gpu-no-rtc') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false) } } } @@ -608,7 +608,7 @@ def compile_unix_clang10_cuda_werror() { ws('workspace/build-cpu-clang10') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false) utils.pack_lib('gpu_clang10', mx_lib) } } @@ -831,7 +831,7 @@ def test_unix_python3_gpu() { ws('workspace/ut-python3-gpu') { try { utils.unpack_and_init('gpu', mx_lib_cython) - python3_gpu_ut_cython('ubuntu_gpu_cu101') + python3_gpu_ut_cython('ubuntu_build_cuda') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml') @@ -847,7 +847,7 @@ def test_unix_python3_gpu_no_tvm_op() { ws('workspace/ut-python3-gpu-no-tvm-op') { try { utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op) - python3_gpu_ut_cython('ubuntu_gpu_cu101') + python3_gpu_ut_cython('ubuntu_build_cuda') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml') @@ -864,7 +864,7 @@ def test_unix_python3_quantize_gpu() { timeout(time: max_time, unit: 'MINUTES') { try { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true) + utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true) utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml') @@ -947,7 +947,7 @@ def test_unix_python3_mkldnn_gpu() { ws('workspace/ut-python3-mkldnn-gpu') { try { utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib) - python3_gpu_ut('ubuntu_gpu_cu101') + python3_gpu_ut('ubuntu_build_cuda') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml') @@ -963,7 +963,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() { ws('workspace/ut-python3-mkldnn-gpu-nocudnn') { try { utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib) - python3_gpu_ut_nocudnn('ubuntu_gpu_cu101') + python3_gpu_ut_nocudnn('ubuntu_build_cuda') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml') @@ -997,7 +997,7 @@ def test_unix_python3_integration_gpu() { ws('workspace/it-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true) + utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true) utils.publish_test_coverage() } } @@ -1011,7 +1011,7 @@ def test_unix_cpp_package_gpu() { ws('workspace/it-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true) + utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true) utils.publish_test_coverage() } } @@ -1025,7 +1025,7 @@ def test_unix_capi_cpp_package() { ws('workspace/it-capi-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true) + utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true) utils.publish_test_coverage() } } @@ -1067,7 +1067,7 @@ def test_unix_scala_gpu() { ws('workspace/ut-scala-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_make', mx_lib_make) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true) + utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true) utils.publish_test_coverage() } } @@ -1150,7 +1150,7 @@ def test_unix_cpp_gpu() { ws('workspace/ut-cpp-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('cmake_gpu', mx_cmake_lib) - utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true) + utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true) utils.publish_test_coverage() } } @@ -1178,7 +1178,7 @@ def test_unix_perl_gpu() { ws('workspace/ut-perl-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_make', mx_lib_make) - utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true) + utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true) utils.publish_test_coverage() } } @@ -1192,7 +1192,7 @@ def test_unix_r_gpu() { ws('workspace/ut-r-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true) + utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true) utils.publish_test_coverage() } } @@ -1260,7 +1260,7 @@ def test_unix_distributed_kvstore_gpu() { ws('workspace/it-dist-kvstore') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true) + utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true) utils.publish_test_coverage() } } From 1c720fad8791a4518b4012de2e3339a7cdff5d74 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Tue, 28 Apr 2020 08:42:51 -0700 Subject: [PATCH 3/7] skip flaky tests --- tests/python/unittest/test_numpy_interoperability.py | 2 ++ tests/python/unittest/test_numpy_ndarray.py | 1 + tests/python/unittest/test_numpy_op.py | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 9e94ad303afb..18a54eab6a94 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -3250,6 +3250,7 @@ def test_np_memory_array_function(): assert op(data_mx, np.ones((5, 0))) == op(data_np, _np.ones((5, 0))) +@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np @with_array_function_protocol @@ -3257,6 +3258,7 @@ def test_np_array_function_protocol(): check_interoperability(_NUMPY_ARRAY_FUNCTION_LIST) +@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np @with_array_ufunc_protocol diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py index 98267e9bc650..15607c54781a 100644 --- a/tests/python/unittest/test_numpy_ndarray.py +++ b/tests/python/unittest/test_numpy_ndarray.py @@ -259,6 +259,7 @@ def check_identity_array_creation(shape, dtype): assert type(y[1]) == np.ndarray +@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() def test_np_ndarray_binary_element_wise_ops(): np_op_map = { diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 115e76b408cc..612f805b65d0 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -636,6 +636,7 @@ def hybrid_forward(self, F, a, b): assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2) +@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_sum(): @@ -1029,6 +1030,7 @@ def avg_backward(a, w, avg, axes, init_a_grad=None, init_w_grad=None): assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol) +@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_mean(): @@ -2220,6 +2222,7 @@ def hybrid_forward(self, F, x): assert same(ret_mx.asnumpy(), ret_np) +@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_unary_funcs(): @@ -2442,6 +2445,7 @@ def hybrid_forward(self, F, a, *args, **kwargs): check_unary_func(func, ref_grad, shape, low, high) +@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_binary_funcs(): From ec5330d7c86e770687053480be585a75712ce2a4 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Tue, 28 Apr 2020 15:56:44 -0700 Subject: [PATCH 4/7] get rid of ubuntu_build_cuda and point ubuntu_cu101 to base gpu instead of cuda compat --- ci/build.py | 2 +- ci/docker/Dockerfile.build.ubuntu | 5 --- ci/docker/docker-compose.yml | 13 +------- ci/jenkins/Jenkins_steps.groovy | 48 ++++++++++++++-------------- tests/nightly/JenkinsfileForBinaries | 2 +- 5 files changed, 27 insertions(+), 43 deletions(-) diff --git a/ci/build.py b/ci/build.py index 72dfef7c5876..df2f22e98624 100755 --- a/ci/build.py +++ b/ci/build.py @@ -45,7 +45,7 @@ # NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100', 'centos7_gpu_cu101', 'centos7_gpu_cu102', 'ubuntu_cpu', - 'ubuntu_build_cuda', 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu', + 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu', 'publish.test.centos7_gpu') # Files for docker compose DOCKER_COMPOSE_FILES = set(('docker/build.centos7', 'docker/build.ubuntu', 'docker/publish.test.centos7')) diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu index 63a4fd3a005d..7e4b3d08d0bd 100644 --- a/ci/docker/Dockerfile.build.ubuntu +++ b/ci/docker/Dockerfile.build.ubuntu @@ -160,8 +160,3 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt install -y --no-install-recommends \ cuda-10-1 && \ rm -rf /var/lib/apt/lists/* - - -FROM gpu as gpuwithcompatenv -# TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH. -# This should be fixed and deleted. \ No newline at end of file diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml index 7c645a0c9786..22eab4b6a33a 100644 --- a/ci/docker/docker-compose.yml +++ b/ci/docker/docker-compose.yml @@ -110,23 +110,12 @@ services: build: context: . dockerfile: Dockerfile.build.ubuntu - target: gpuwithcompatenv + target: gpu args: BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 cache_from: - build.ubuntu_gpu_cu101:latest - mxnetci/build.ubuntu_gpu_cu101:latest - ubuntu_build_cuda: - image: build.ubuntu_build_cuda:latest - build: - context: . - dockerfile: Dockerfile.build.ubuntu - target: gpuwithcudaruntimelibs - args: - BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 - cache_from: - - build.ubuntu_build_cuda:latest - - mxnetci/build.ubuntu_build_cuda:latest ################################################################################################### # Dockerfile.publish.test based images used for testing binary artifacts on minimal systems. ################################################################################################### diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index b7c2763f7ec2..14bb91f86088 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -155,7 +155,7 @@ def compile_unix_int64_gpu() { ws('workspace/build-gpu-int64') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false) utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib) } } @@ -225,7 +225,7 @@ def compile_unix_mkldnn_gpu() { ws('workspace/build-mkldnn-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false) utils.pack_lib('mkldnn_gpu', mx_mkldnn_lib) } } @@ -239,7 +239,7 @@ def compile_unix_mkldnn_nocudnn_gpu() { ws('workspace/build-mkldnn-gpu-nocudnn') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn_nocudnn', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn_nocudnn', false) utils.pack_lib('mkldnn_gpu_nocudnn', mx_mkldnn_lib) } } @@ -253,7 +253,7 @@ def compile_unix_full_gpu() { ws('workspace/build-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7', false) utils.pack_lib('gpu', mx_lib_cpp_examples) } } @@ -267,7 +267,7 @@ def compile_unix_full_gpu_make() { ws('workspace/build-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_make', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_make', false) utils.pack_lib('gpu_make', mx_lib_cpp_examples_make) } } @@ -281,7 +281,7 @@ def compile_unix_full_gpu_mkldnn_cpp_test() { ws('workspace/build-gpu-mkldnn-cpp') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false) utils.pack_lib('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make) } } @@ -295,7 +295,7 @@ def compile_unix_full_gpu_no_tvm_op() { ws('workspace/build-gpu-no-tvm-op') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false) utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op) } } @@ -309,7 +309,7 @@ def compile_unix_cmake_gpu() { ws('workspace/build-cmake-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false) utils.pack_lib('cmake_gpu', mx_cmake_lib_cython) } } @@ -323,7 +323,7 @@ def compile_unix_cmake_gpu_no_tvm_op() { ws('workspace/build-cmake-gpu-no-tvm-op') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_tvm_op', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false) } } } @@ -336,7 +336,7 @@ def compile_unix_cmake_gpu_no_rtc() { ws('workspace/build-cmake-gpu-no-rtc') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false) } } } @@ -608,7 +608,7 @@ def compile_unix_clang10_cuda_werror() { ws('workspace/build-cpu-clang10') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false) utils.pack_lib('gpu_clang10', mx_lib) } } @@ -831,7 +831,7 @@ def test_unix_python3_gpu() { ws('workspace/ut-python3-gpu') { try { utils.unpack_and_init('gpu', mx_lib_cython) - python3_gpu_ut_cython('ubuntu_build_cuda') + python3_gpu_ut_cython('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml') @@ -847,7 +847,7 @@ def test_unix_python3_gpu_no_tvm_op() { ws('workspace/ut-python3-gpu-no-tvm-op') { try { utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op) - python3_gpu_ut_cython('ubuntu_build_cuda') + python3_gpu_ut_cython('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml') @@ -864,7 +864,7 @@ def test_unix_python3_quantize_gpu() { timeout(time: max_time, unit: 'MINUTES') { try { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true) utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml') @@ -947,7 +947,7 @@ def test_unix_python3_mkldnn_gpu() { ws('workspace/ut-python3-mkldnn-gpu') { try { utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib) - python3_gpu_ut('ubuntu_build_cuda') + python3_gpu_ut('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml') @@ -963,7 +963,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() { ws('workspace/ut-python3-mkldnn-gpu-nocudnn') { try { utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib) - python3_gpu_ut_nocudnn('ubuntu_build_cuda') + python3_gpu_ut_nocudnn('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml') @@ -997,7 +997,7 @@ def test_unix_python3_integration_gpu() { ws('workspace/it-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true) utils.publish_test_coverage() } } @@ -1011,7 +1011,7 @@ def test_unix_cpp_package_gpu() { ws('workspace/it-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make) - utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true) utils.publish_test_coverage() } } @@ -1025,7 +1025,7 @@ def test_unix_capi_cpp_package() { ws('workspace/it-capi-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make) - utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true) utils.publish_test_coverage() } } @@ -1067,7 +1067,7 @@ def test_unix_scala_gpu() { ws('workspace/ut-scala-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_make', mx_lib_make) - utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true) utils.publish_test_coverage() } } @@ -1150,7 +1150,7 @@ def test_unix_cpp_gpu() { ws('workspace/ut-cpp-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('cmake_gpu', mx_cmake_lib) - utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true) utils.publish_test_coverage() } } @@ -1178,7 +1178,7 @@ def test_unix_perl_gpu() { ws('workspace/ut-perl-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu_make', mx_lib_make) - utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true) utils.publish_test_coverage() } } @@ -1192,7 +1192,7 @@ def test_unix_r_gpu() { ws('workspace/ut-r-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true) utils.publish_test_coverage() } } @@ -1260,7 +1260,7 @@ def test_unix_distributed_kvstore_gpu() { ws('workspace/it-dist-kvstore') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true) utils.publish_test_coverage() } } diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index 81c022c28990..3f79ebdb7940 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -34,7 +34,7 @@ core_logic: { node(NODE_LINUX_CPU) { ws('workspace/build-mkldnn-gpu') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false) utils.pack_lib('gpu', mx_lib) } } From 36f556339ef9291da624b1b837d49b524fbf2831 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Thu, 7 May 2020 09:55:35 -0700 Subject: [PATCH 5/7] Revert "skip flaky tests" This reverts commit 1c720fad8791a4518b4012de2e3339a7cdff5d74. --- tests/python/unittest/test_numpy_interoperability.py | 2 -- tests/python/unittest/test_numpy_ndarray.py | 1 - tests/python/unittest/test_numpy_op.py | 4 ---- 3 files changed, 7 deletions(-) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 18a54eab6a94..9e94ad303afb 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -3250,7 +3250,6 @@ def test_np_memory_array_function(): assert op(data_mx, np.ones((5, 0))) == op(data_np, _np.ones((5, 0))) -@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np @with_array_function_protocol @@ -3258,7 +3257,6 @@ def test_np_array_function_protocol(): check_interoperability(_NUMPY_ARRAY_FUNCTION_LIST) -@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np @with_array_ufunc_protocol diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py index 15607c54781a..98267e9bc650 100644 --- a/tests/python/unittest/test_numpy_ndarray.py +++ b/tests/python/unittest/test_numpy_ndarray.py @@ -259,7 +259,6 @@ def check_identity_array_creation(shape, dtype): assert type(y[1]) == np.ndarray -@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() def test_np_ndarray_binary_element_wise_ops(): np_op_map = { diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 612f805b65d0..115e76b408cc 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -636,7 +636,6 @@ def hybrid_forward(self, F, a, b): assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2) -@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_sum(): @@ -1030,7 +1029,6 @@ def avg_backward(a, w, avg, axes, init_a_grad=None, init_w_grad=None): assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol) -@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_mean(): @@ -2222,7 +2220,6 @@ def hybrid_forward(self, F, x): assert same(ret_mx.asnumpy(), ret_np) -@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_unary_funcs(): @@ -2445,7 +2442,6 @@ def hybrid_forward(self, F, a, *args, **kwargs): check_unary_func(func, ref_grad, shape, low, high) -@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840") @with_seed() @use_np def test_np_binary_funcs(): From 386c42bdd3314a4c559089ccbd2445360abc881d Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Sat, 9 May 2020 19:56:19 -0700 Subject: [PATCH 6/7] revert removal of ubuntu_build_cuda --- ci/build.py | 2 +- tests/nightly/JenkinsfileForBinaries | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/build.py b/ci/build.py index 46497f11cbff..24acd8fa4db3 100755 --- a/ci/build.py +++ b/ci/build.py @@ -45,7 +45,7 @@ # NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100', 'centos7_gpu_cu101', 'centos7_gpu_cu102', 'ubuntu_cpu', - 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu', + 'ubuntu_build_cuda', 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu', 'publish.test.centos7_gpu') # Files for docker compose DOCKER_COMPOSE_FILES = set(('docker/build.centos7', 'docker/build.ubuntu', 'docker/publish.test.centos7')) diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index a524304afe75..be65398e14da 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -34,7 +34,7 @@ core_logic: { node(NODE_LINUX_CPU) { ws('workspace/build-mkldnn-gpu') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false) utils.pack_lib('gpu', mx_lib) } } From 2b05566c12956d82f48a03b9cabb24667d962a64 Mon Sep 17 00:00:00 2001 From: ChaiBapchya Date: Sat, 9 May 2020 20:04:50 -0700 Subject: [PATCH 7/7] add linux gpu g4 node to all steps using g3 in unix-gpu pipeline --- ci/Jenkinsfile_utils.groovy | 1 + ci/jenkins/Jenkins_steps.groovy | 24 ++++++++++++------------ ci/jenkins/Jenkinsfile_unix_gpu | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy index 4954963ff9c6..ee254b0dbee9 100644 --- a/ci/Jenkinsfile_utils.groovy +++ b/ci/Jenkinsfile_utils.groovy @@ -257,6 +257,7 @@ def assign_node_labels(args) { // knowing about the limitations. NODE_LINUX_CPU = args.linux_cpu NODE_LINUX_GPU = args.linux_gpu + NODE_LINUX_GPU_G4 = args.linux_gpu_g4 NODE_LINUX_GPU_P3 = args.linux_gpu_p3 NODE_WINDOWS_CPU = args.windows_cpu NODE_WINDOWS_GPU = args.windows_gpu diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 204567d730c0..20024d2e47c7 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -151,7 +151,7 @@ def compile_unix_int64_cpu(lib_name) { def compile_unix_int64_gpu(lib_name) { return ['GPU: USE_INT64_TENSOR_SIZE': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/build-gpu-int64') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() @@ -816,7 +816,7 @@ def test_unix_python3_mkl_cpu(lib_name) { def test_unix_python3_gpu(lib_name) { return ['Python3: GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-python3-gpu') { try { utils.unpack_and_init(lib_name, mx_lib_cython) @@ -916,7 +916,7 @@ def test_unix_python3_mkldnn_mkl_cpu(lib_name) { def test_unix_python3_mkldnn_gpu(lib_name) { return ['Python3: MKLDNN-GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-python3-mkldnn-gpu') { try { utils.unpack_and_init(lib_name, mx_mkldnn_lib) @@ -932,7 +932,7 @@ def test_unix_python3_mkldnn_gpu(lib_name) { def test_unix_python3_mkldnn_nocudnn_gpu(lib_name) { return ['Python3: MKLDNN-GPU-NOCUDNN': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-python3-mkldnn-gpu-nocudnn') { try { utils.unpack_and_init(lib_name, mx_mkldnn_lib) @@ -966,7 +966,7 @@ def test_unix_python3_tensorrt_gpu(lib_name) { def test_unix_python3_integration_gpu(lib_name) { return ['Python Integration GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib) @@ -980,7 +980,7 @@ def test_unix_python3_integration_gpu(lib_name) { def test_unix_cpp_package_gpu(lib_name) { return ['cpp-package GPU Makefile': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib_cpp_examples_make) @@ -994,7 +994,7 @@ def test_unix_cpp_package_gpu(lib_name) { def test_unix_capi_cpp_package(lib_name) { return ['capi-cpp-package GPU Makefile': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-capi-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib_cpp_capi_make) @@ -1036,7 +1036,7 @@ def test_unix_scala_mkldnn_cpu(lib_name){ def test_unix_scala_gpu(lib_name) { return ['Scala: GPU Makefile': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-scala-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib_make) @@ -1119,7 +1119,7 @@ def test_unix_perl_cpu(lib_name) { def test_unix_cpp_gpu(lib_name) { return ['Cpp: GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-cpp-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_cmake_lib) @@ -1147,7 +1147,7 @@ def test_unix_cpp_cpu(lib_name) { def test_unix_perl_gpu(lib_name) { return ['Perl: GPU Makefile': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-perl-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib_make) @@ -1161,7 +1161,7 @@ def test_unix_perl_gpu(lib_name) { def test_unix_r_gpu(lib_name) { return ['R: GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/ut-r-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib) @@ -1229,7 +1229,7 @@ def test_unix_distributed_kvstore_cpu(lib_name) { def test_unix_distributed_kvstore_gpu(lib_name) { return ['dist-kvstore tests GPU': { - node(NODE_LINUX_GPU) { + node(NODE_LINUX_GPU_G4) { ws('workspace/it-dist-kvstore') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib) diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index ece2bbb2d277..805eba83e50a 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -29,7 +29,7 @@ node('utility') { utils = load('ci/Jenkinsfile_utils.groovy') custom_steps = load('ci/jenkins/Jenkins_steps.groovy') } -utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3') +utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3', linux_gpu_g4: 'mxnetlinux-gpu-g4') utils.main_wrapper( core_logic: {