From fe1fef4bdb93ac8bc324439981fc7c2f4ae6eea7 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sun, 26 Apr 2020 17:33:45 -0700
Subject: [PATCH 1/7] update nvidiadocker command & remove cuda compat

---
 ci/build.py                       | 3 ++-
 ci/docker/Dockerfile.build.ubuntu | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 323a4487fafc..72dfef7c5876 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -222,8 +222,9 @@ def container_run(docker_client: SafeDockerClient,
 
     # Equivalent command
     docker_cmd_list = [
-        "nvidia-docker" if nvidia_runtime else "docker",
+        "docker",
         'run',
+        "--gpus all" if nvidia_runtime else "",
         "--cap-add",
         "SYS_PTRACE", # Required by ASAN
         '--rm',
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index c44a5cce8138..63a4fd3a005d 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -164,5 +164,4 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
 
 FROM gpu as gpuwithcompatenv
 # TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH.
-# This should be fixed and deleted.
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat
\ No newline at end of file
+# This should be fixed and deleted.
\ No newline at end of file

From f761b240025803bae47e51eed830e941ff5b9e7b Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Mon, 27 Apr 2020 19:52:24 -0700
Subject: [PATCH 2/7] replace cu101 with cuda since compat is no longer to be
 used

---
 ci/jenkins/Jenkins_steps.groovy | 36 ++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 28d76beabab2..b7c2763f7ec2 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -155,7 +155,7 @@ def compile_unix_int64_gpu() {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false)
             utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib)
           }
         }
@@ -309,7 +309,7 @@ def compile_unix_cmake_gpu() {
         ws('workspace/build-cmake-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false)
             utils.pack_lib('cmake_gpu', mx_cmake_lib_cython)
           }
         }
@@ -323,7 +323,7 @@ def compile_unix_cmake_gpu_no_tvm_op() {
         ws('workspace/build-cmake-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
           }
         }
       }
@@ -336,7 +336,7 @@ def compile_unix_cmake_gpu_no_rtc() {
             ws('workspace/build-cmake-gpu-no-rtc') {
                 timeout(time: max_time, unit: 'MINUTES') {
                     utils.init_git()
-                    utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
+                    utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false)
                 }
             }
         }
@@ -608,7 +608,7 @@ def compile_unix_clang10_cuda_werror() {
         ws('workspace/build-cpu-clang10') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false)
+            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false)
             utils.pack_lib('gpu_clang10', mx_lib)
           }
         }
@@ -831,7 +831,7 @@ def test_unix_python3_gpu() {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
+            python3_gpu_ut_cython('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -847,7 +847,7 @@ def test_unix_python3_gpu_no_tvm_op() {
         ws('workspace/ut-python3-gpu-no-tvm-op') {
           try {
             utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
+            python3_gpu_ut_cython('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -864,7 +864,7 @@ def test_unix_python3_quantize_gpu() {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
               utils.unpack_and_init('gpu', mx_lib)
-              utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true)
+              utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true)
               utils.publish_test_coverage()
             } finally {
               utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml')
@@ -947,7 +947,7 @@ def test_unix_python3_mkldnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
-            python3_gpu_ut('ubuntu_gpu_cu101')
+            python3_gpu_ut('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml')
@@ -963,7 +963,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
-            python3_gpu_ut_nocudnn('ubuntu_gpu_cu101')
+            python3_gpu_ut_nocudnn('ubuntu_build_cuda')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml')
@@ -997,7 +997,7 @@ def test_unix_python3_integration_gpu() {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true)
             utils.publish_test_coverage()
           }
         }
@@ -1011,7 +1011,7 @@ def test_unix_cpp_package_gpu() {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1025,7 +1025,7 @@ def test_unix_capi_cpp_package() {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1067,7 +1067,7 @@ def test_unix_scala_gpu() {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true)
             utils.publish_test_coverage()
           }
         }
@@ -1150,7 +1150,7 @@ def test_unix_cpp_gpu() {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true)
             utils.publish_test_coverage()
           }
         }
@@ -1178,7 +1178,7 @@ def test_unix_perl_gpu() {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true)
             utils.publish_test_coverage()
           }
         }
@@ -1192,7 +1192,7 @@ def test_unix_r_gpu() {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
+            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true)
             utils.publish_test_coverage()
           }
         }
@@ -1260,7 +1260,7 @@ def test_unix_distributed_kvstore_gpu() {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
+            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
             utils.publish_test_coverage()
           }
         }

From 1c720fad8791a4518b4012de2e3339a7cdff5d74 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Tue, 28 Apr 2020 08:42:51 -0700
Subject: [PATCH 3/7] skip flaky tests

---
 tests/python/unittest/test_numpy_interoperability.py | 2 ++
 tests/python/unittest/test_numpy_ndarray.py          | 1 +
 tests/python/unittest/test_numpy_op.py               | 4 ++++
 3 files changed, 7 insertions(+)

diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py
index 9e94ad303afb..18a54eab6a94 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -3250,6 +3250,7 @@ def test_np_memory_array_function():
         assert op(data_mx, np.ones((5, 0))) == op(data_np, _np.ones((5, 0)))
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_function_protocol
@@ -3257,6 +3258,7 @@ def test_np_array_function_protocol():
     check_interoperability(_NUMPY_ARRAY_FUNCTION_LIST)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_ufunc_protocol
diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py
index 98267e9bc650..15607c54781a 100644
--- a/tests/python/unittest/test_numpy_ndarray.py
+++ b/tests/python/unittest/test_numpy_ndarray.py
@@ -259,6 +259,7 @@ def check_identity_array_creation(shape, dtype):
                 assert type(y[1]) == np.ndarray
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 def test_np_ndarray_binary_element_wise_ops():
     np_op_map = {
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 115e76b408cc..612f805b65d0 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -636,6 +636,7 @@ def hybrid_forward(self, F, a, b):
                 assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_sum():
@@ -1029,6 +1030,7 @@ def avg_backward(a, w, avg, axes, init_a_grad=None, init_w_grad=None):
         assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_mean():
@@ -2220,6 +2222,7 @@ def hybrid_forward(self, F, x):
                 assert same(ret_mx.asnumpy(), ret_np)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_unary_funcs():
@@ -2442,6 +2445,7 @@ def hybrid_forward(self, F, a, *args, **kwargs):
             check_unary_func(func, ref_grad, shape, low, high)
 
 
+@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_binary_funcs():

From ec5330d7c86e770687053480be585a75712ce2a4 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Tue, 28 Apr 2020 15:56:44 -0700
Subject: [PATCH 4/7] get rid of ubuntu_build_cuda and point ubuntu_cu101 to
 base gpu instead of cuda compat

---
 ci/build.py                          |  2 +-
 ci/docker/Dockerfile.build.ubuntu    |  5 ---
 ci/docker/docker-compose.yml         | 13 +-------
 ci/jenkins/Jenkins_steps.groovy      | 48 ++++++++++++++--------------
 tests/nightly/JenkinsfileForBinaries |  2 +-
 5 files changed, 27 insertions(+), 43 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 72dfef7c5876..df2f22e98624 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -45,7 +45,7 @@
 # NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose
 DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100',
                             'centos7_gpu_cu101', 'centos7_gpu_cu102', 'ubuntu_cpu',
-                            'ubuntu_build_cuda', 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
+                            'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
                             'publish.test.centos7_gpu')
 # Files for docker compose
 DOCKER_COMPOSE_FILES = set(('docker/build.centos7', 'docker/build.ubuntu', 'docker/publish.test.centos7'))
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index 63a4fd3a005d..7e4b3d08d0bd 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -160,8 +160,3 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     apt install -y  --no-install-recommends \
         cuda-10-1 && \
     rm -rf /var/lib/apt/lists/*
-
-
-FROM gpu as gpuwithcompatenv
-# TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH.
-# This should be fixed and deleted.
\ No newline at end of file
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
index 7c645a0c9786..22eab4b6a33a 100644
--- a/ci/docker/docker-compose.yml
+++ b/ci/docker/docker-compose.yml
@@ -110,23 +110,12 @@ services:
     build:
       context: .
       dockerfile: Dockerfile.build.ubuntu
-      target: gpuwithcompatenv
+      target: gpu
       args:
         BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
       cache_from:
         - build.ubuntu_gpu_cu101:latest
         - mxnetci/build.ubuntu_gpu_cu101:latest
-  ubuntu_build_cuda:
-    image: build.ubuntu_build_cuda:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.ubuntu
-      target: gpuwithcudaruntimelibs
-      args:
-        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
-      cache_from:
-        - build.ubuntu_build_cuda:latest
-        - mxnetci/build.ubuntu_build_cuda:latest
   ###################################################################################################
   # Dockerfile.publish.test based images used for testing binary artifacts on minimal systems.
   ###################################################################################################
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index b7c2763f7ec2..14bb91f86088 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -155,7 +155,7 @@ def compile_unix_int64_gpu() {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_large_tensor', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false)
             utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib)
           }
         }
@@ -225,7 +225,7 @@ def compile_unix_mkldnn_gpu() {
         ws('workspace/build-mkldnn-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false)
             utils.pack_lib('mkldnn_gpu', mx_mkldnn_lib)
           }
         }
@@ -239,7 +239,7 @@ def compile_unix_mkldnn_nocudnn_gpu() {
          ws('workspace/build-mkldnn-gpu-nocudnn') {
            timeout(time: max_time, unit: 'MINUTES') {
              utils.init_git()
-             utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn_nocudnn', false)
+             utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn_nocudnn', false)
              utils.pack_lib('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
            }
          }
@@ -253,7 +253,7 @@ def compile_unix_full_gpu() {
         ws('workspace/build-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7', false)
             utils.pack_lib('gpu', mx_lib_cpp_examples)
           }
         }
@@ -267,7 +267,7 @@ def compile_unix_full_gpu_make() {
         ws('workspace/build-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_make', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_make', false)
             utils.pack_lib('gpu_make', mx_lib_cpp_examples_make)
           }
         }
@@ -281,7 +281,7 @@ def compile_unix_full_gpu_mkldnn_cpp_test() {
         ws('workspace/build-gpu-mkldnn-cpp') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false)
             utils.pack_lib('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
           }
         }
@@ -295,7 +295,7 @@ def compile_unix_full_gpu_no_tvm_op() {
         ws('workspace/build-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
             utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
           }
         }
@@ -309,7 +309,7 @@ def compile_unix_cmake_gpu() {
         ws('workspace/build-cmake-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false)
             utils.pack_lib('cmake_gpu', mx_cmake_lib_cython)
           }
         }
@@ -323,7 +323,7 @@ def compile_unix_cmake_gpu_no_tvm_op() {
         ws('workspace/build-cmake-gpu-no-tvm-op') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
           }
         }
       }
@@ -336,7 +336,7 @@ def compile_unix_cmake_gpu_no_rtc() {
             ws('workspace/build-cmake-gpu-no-rtc') {
                 timeout(time: max_time, unit: 'MINUTES') {
                     utils.init_git()
-                    utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cmake_no_rtc', false)
+                    utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
                 }
             }
         }
@@ -608,7 +608,7 @@ def compile_unix_clang10_cuda_werror() {
         ws('workspace/build-cpu-clang10') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_clang10_werror', false)
+            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false)
             utils.pack_lib('gpu_clang10', mx_lib)
           }
         }
@@ -831,7 +831,7 @@ def test_unix_python3_gpu() {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
-            python3_gpu_ut_cython('ubuntu_build_cuda')
+            python3_gpu_ut_cython('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -847,7 +847,7 @@ def test_unix_python3_gpu_no_tvm_op() {
         ws('workspace/ut-python3-gpu-no-tvm-op') {
           try {
             utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_build_cuda')
+            python3_gpu_ut_cython('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml')
@@ -864,7 +864,7 @@ def test_unix_python3_quantize_gpu() {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
               utils.unpack_and_init('gpu', mx_lib)
-              utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_python3_quantization_gpu', true)
+              utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true)
               utils.publish_test_coverage()
             } finally {
               utils.collect_test_results_unix('tests_quantization_gpu.xml', 'tests_python3_quantize_gpu.xml')
@@ -947,7 +947,7 @@ def test_unix_python3_mkldnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
-            python3_gpu_ut('ubuntu_build_cuda')
+            python3_gpu_ut('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml')
@@ -963,7 +963,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
-            python3_gpu_ut_nocudnn('ubuntu_build_cuda')
+            python3_gpu_ut_nocudnn('ubuntu_gpu_cu101')
             utils.publish_test_coverage()
           } finally {
             utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml')
@@ -997,7 +997,7 @@ def test_unix_python3_integration_gpu() {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_python', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true)
             utils.publish_test_coverage()
           }
         }
@@ -1011,7 +1011,7 @@ def test_unix_cpp_package_gpu() {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_cpp_examples_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_cpp_package', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1025,7 +1025,7 @@ def test_unix_capi_cpp_package() {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test_make', mx_lib_cpp_capi_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_capi_cpp_package', true)
             utils.publish_test_coverage()
           }
         }
@@ -1067,7 +1067,7 @@ def test_unix_scala_gpu() {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_scala', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true)
             utils.publish_test_coverage()
           }
         }
@@ -1150,7 +1150,7 @@ def test_unix_cpp_gpu() {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_cpp', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true)
             utils.publish_test_coverage()
           }
         }
@@ -1178,7 +1178,7 @@ def test_unix_perl_gpu() {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_make', mx_lib_make)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_cpugpu_perl', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true)
             utils.publish_test_coverage()
           }
         }
@@ -1192,7 +1192,7 @@ def test_unix_r_gpu() {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'unittest_ubuntu_gpu_R', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true)
             utils.publish_test_coverage()
           }
         }
@@ -1260,7 +1260,7 @@ def test_unix_distributed_kvstore_gpu() {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
-            utils.docker_run('ubuntu_build_cuda', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
+            utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true)
             utils.publish_test_coverage()
           }
         }
diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index 81c022c28990..3f79ebdb7940 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -34,7 +34,7 @@ core_logic: {
       node(NODE_LINUX_CPU) {
         ws('workspace/build-mkldnn-gpu') {
           utils.init_git()
-          utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false)
+          utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false)
           utils.pack_lib('gpu', mx_lib)
         }
       }

From 36f556339ef9291da624b1b837d49b524fbf2831 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Thu, 7 May 2020 09:55:35 -0700
Subject: [PATCH 5/7] Revert "skip flaky tests"

This reverts commit 1c720fad8791a4518b4012de2e3339a7cdff5d74.
---
 tests/python/unittest/test_numpy_interoperability.py | 2 --
 tests/python/unittest/test_numpy_ndarray.py          | 1 -
 tests/python/unittest/test_numpy_op.py               | 4 ----
 3 files changed, 7 deletions(-)

diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py
index 18a54eab6a94..9e94ad303afb 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -3250,7 +3250,6 @@ def test_np_memory_array_function():
         assert op(data_mx, np.ones((5, 0))) == op(data_np, _np.ones((5, 0)))
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_function_protocol
@@ -3258,7 +3257,6 @@ def test_np_array_function_protocol():
     check_interoperability(_NUMPY_ARRAY_FUNCTION_LIST)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 @with_array_ufunc_protocol
diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py
index 15607c54781a..98267e9bc650 100644
--- a/tests/python/unittest/test_numpy_ndarray.py
+++ b/tests/python/unittest/test_numpy_ndarray.py
@@ -259,7 +259,6 @@ def check_identity_array_creation(shape, dtype):
                 assert type(y[1]) == np.ndarray
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 def test_np_ndarray_binary_element_wise_ops():
     np_op_map = {
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 612f805b65d0..115e76b408cc 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -636,7 +636,6 @@ def hybrid_forward(self, F, a, b):
                 assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_sum():
@@ -1030,7 +1029,6 @@ def avg_backward(a, w, avg, axes, init_a_grad=None, init_w_grad=None):
         assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_mean():
@@ -2222,7 +2220,6 @@ def hybrid_forward(self, F, x):
                 assert same(ret_mx.asnumpy(), ret_np)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_unary_funcs():
@@ -2445,7 +2442,6 @@ def hybrid_forward(self, F, a, *args, **kwargs):
             check_unary_func(func, ref_grad, shape, low, high)
 
 
-@unittest.skip("Flaky test https://github.com/apache/incubator-mxnet/issues/17840")
 @with_seed()
 @use_np
 def test_np_binary_funcs():

From 386c42bdd3314a4c559089ccbd2445360abc881d Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 9 May 2020 19:56:19 -0700
Subject: [PATCH 6/7] revert removal of ubuntu_build_cuda

---
 ci/build.py                          | 2 +-
 tests/nightly/JenkinsfileForBinaries | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 46497f11cbff..24acd8fa4db3 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -45,7 +45,7 @@
 # NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose
 DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100',
                             'centos7_gpu_cu101', 'centos7_gpu_cu102', 'ubuntu_cpu',
-                            'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
+                            'ubuntu_build_cuda', 'ubuntu_gpu_cu101', 'publish.test.centos7_cpu',
                             'publish.test.centos7_gpu')
 # Files for docker compose
 DOCKER_COMPOSE_FILES = set(('docker/build.centos7', 'docker/build.ubuntu', 'docker/publish.test.centos7'))
diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries
index a524304afe75..be65398e14da 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -34,7 +34,7 @@ core_logic: {
       node(NODE_LINUX_CPU) {
         ws('workspace/build-mkldnn-gpu') {
           utils.init_git()
-          utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_mkldnn', false)
+          utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false)
           utils.pack_lib('gpu', mx_lib)
         }
       }

From 2b05566c12956d82f48a03b9cabb24667d962a64 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sat, 9 May 2020 20:04:50 -0700
Subject: [PATCH 7/7] add linux gpu g4 node to all steps using g3 in unix-gpu
 pipeline

---
 ci/Jenkinsfile_utils.groovy     |  1 +
 ci/jenkins/Jenkins_steps.groovy | 24 ++++++++++++------------
 ci/jenkins/Jenkinsfile_unix_gpu |  2 +-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
index 4954963ff9c6..ee254b0dbee9 100644
--- a/ci/Jenkinsfile_utils.groovy
+++ b/ci/Jenkinsfile_utils.groovy
@@ -257,6 +257,7 @@ def assign_node_labels(args) {
   //    knowing about the limitations.
   NODE_LINUX_CPU = args.linux_cpu
   NODE_LINUX_GPU = args.linux_gpu
+  NODE_LINUX_GPU_G4 = args.linux_gpu_g4
   NODE_LINUX_GPU_P3 = args.linux_gpu_p3
   NODE_WINDOWS_CPU = args.windows_cpu
   NODE_WINDOWS_GPU = args.windows_gpu
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 204567d730c0..20024d2e47c7 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -151,7 +151,7 @@ def compile_unix_int64_cpu(lib_name) {
 
 def compile_unix_int64_gpu(lib_name) {
     return ['GPU: USE_INT64_TENSOR_SIZE': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
@@ -816,7 +816,7 @@ def test_unix_python3_mkl_cpu(lib_name) {
 
 def test_unix_python3_gpu(lib_name) {
     return ['Python3: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init(lib_name, mx_lib_cython)
@@ -916,7 +916,7 @@ def test_unix_python3_mkldnn_mkl_cpu(lib_name) {
 
 def test_unix_python3_mkldnn_gpu(lib_name) {
     return ['Python3: MKLDNN-GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init(lib_name, mx_mkldnn_lib)
@@ -932,7 +932,7 @@ def test_unix_python3_mkldnn_gpu(lib_name) {
 
 def test_unix_python3_mkldnn_nocudnn_gpu(lib_name) {
     return ['Python3: MKLDNN-GPU-NOCUDNN': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init(lib_name, mx_mkldnn_lib)
@@ -966,7 +966,7 @@ def test_unix_python3_tensorrt_gpu(lib_name) {
 
 def test_unix_python3_integration_gpu(lib_name) {
     return ['Python Integration GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
@@ -980,7 +980,7 @@ def test_unix_python3_integration_gpu(lib_name) {
 
 def test_unix_cpp_package_gpu(lib_name) {
     return ['cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_cpp_examples_make)
@@ -994,7 +994,7 @@ def test_unix_cpp_package_gpu(lib_name) {
 
 def test_unix_capi_cpp_package(lib_name) {
     return ['capi-cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_cpp_capi_make)
@@ -1036,7 +1036,7 @@ def test_unix_scala_mkldnn_cpu(lib_name){
 
 def test_unix_scala_gpu(lib_name) {
     return ['Scala: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_make)
@@ -1119,7 +1119,7 @@ def test_unix_perl_cpu(lib_name) {
 
 def test_unix_cpp_gpu(lib_name) {
     return ['Cpp: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_cmake_lib)
@@ -1147,7 +1147,7 @@ def test_unix_cpp_cpu(lib_name) {
 
 def test_unix_perl_gpu(lib_name) {
     return ['Perl: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_make)
@@ -1161,7 +1161,7 @@ def test_unix_perl_gpu(lib_name) {
 
 def test_unix_r_gpu(lib_name) {
     return ['R: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
@@ -1229,7 +1229,7 @@ def test_unix_distributed_kvstore_cpu(lib_name) {
 
 def test_unix_distributed_kvstore_gpu(lib_name) {
     return ['dist-kvstore tests GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
index ece2bbb2d277..805eba83e50a 100644
--- a/ci/jenkins/Jenkinsfile_unix_gpu
+++ b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -29,7 +29,7 @@ node('utility') {
   utils = load('ci/Jenkinsfile_utils.groovy')
   custom_steps = load('ci/jenkins/Jenkins_steps.groovy')
 }
-utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3')
+utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3', linux_gpu_g4: 'mxnetlinux-gpu-g4')
 
 utils.main_wrapper(
 core_logic: {