From 8dcae3a1de32e764d83a88f05f54d67492fe72c6 Mon Sep 17 00:00:00 2001
From: Chaitanya Prakash Bapat <chai.bapat@gmail.com>
Date: Mon, 11 May 2020 19:25:02 -0700
Subject: [PATCH 1/9] Update unix gpu toolchain (#18186)

* update nvidiadocker command & remove cuda compat

* replace cu101 with cuda since compat is no longer to be used

* skip flaky tests

* get rid of ubuntu_build_cuda and point ubuntu_cu101 to base gpu instead of cuda compat

* Revert "skip flaky tests"

This reverts commit 1c720fad8791a4518b4012de2e3339a7cdff5d74.

* revert removal of ubuntu_build_cuda

* add linux gpu g4 node to all steps using g3 in unix-gpu pipeline
---
 ci/Jenkinsfile_utils.groovy       |   1 +
 ci/build.py                       |   3 +-
 ci/docker/Dockerfile.build.ubuntu | 166 ++++++++++++++++++++++++
 ci/docker/docker-compose.yml      | 208 ++++++++++++++++++++++++++++++
 ci/jenkins/Jenkins_steps.groovy   |  47 +++----
 ci/jenkins/Jenkinsfile_unix_gpu   |   2 +-
 6 files changed, 394 insertions(+), 33 deletions(-)
 create mode 100644 ci/docker/Dockerfile.build.ubuntu
 create mode 100644 ci/docker/docker-compose.yml

diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
index e7aeae935d06..8ecc7e193b97 100644
--- a/ci/Jenkinsfile_utils.groovy
+++ b/ci/Jenkinsfile_utils.groovy
@@ -255,6 +255,7 @@ def assign_node_labels(args) {
   //    knowing about the limitations.
   NODE_LINUX_CPU = args.linux_cpu
   NODE_LINUX_GPU = args.linux_gpu
+  NODE_LINUX_GPU_G4 = args.linux_gpu_g4
   NODE_LINUX_GPU_P3 = args.linux_gpu_p3
   NODE_WINDOWS_CPU = args.windows_cpu
   NODE_WINDOWS_GPU = args.windows_gpu
diff --git a/ci/build.py b/ci/build.py
index a21ec44942a8..3224dbbce957 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -193,8 +193,9 @@ def container_run(docker_client: SafeDockerClient,
 
     # Equivalent command
     docker_cmd_list = [
-        get_docker_binary(nvidia_runtime),
+        "docker",
         'run',
+        "--gpus all" if nvidia_runtime else "",
         "--cap-add",
         "SYS_PTRACE", # Required by ASAN
         '--rm',
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
new file mode 100644
index 000000000000..d5933c3bb628
--- /dev/null
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -0,0 +1,166 @@
+# -*- mode: dockerfile -*-
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Dockerfile for Ubuntu based builds.
+#
+# See docker-compose.yml for supported BASE_IMAGE ARGs and targets.
+
+####################################################################################################
+# The Dockerfile uses a dynamic BASE_IMAGE (for example ubuntu:18.04
+# nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 etc).
+# On top of BASE_IMAGE we install all dependencies shared by all MXNet build
+# environments into a "base" target. At the end of this file, we can specialize
+# "base" for specific usecases. The target built by docker can be selected via
+# "--target" option or docker-compose.yml
+####################################################################################################
+ARG BASE_IMAGE
+FROM $BASE_IMAGE AS base
+
+WORKDIR /work/deps
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get update && \
+    apt-get install -y wget software-properties-common && \
+    wget -qO - http://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+    wget -qO - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | apt-key add - && \
+    apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" &&  \
+    apt-add-repository "deb https://apt.repos.intel.com/mkl all main" &&  \
+    apt-get update && \
+    apt-get install -y \
+        ## Utilities
+        curl \
+        unzip \
+        pandoc \
+        ## Development tools
+        build-essential \
+        ninja-build \
+        git \
+        protobuf-compiler \
+        libprotobuf-dev \
+        clang-6.0 \
+        clang-tidy-6.0 \
+        python-yaml \
+        clang-10 \
+        g++ \
+        g++-8 \
+        intel-mkl-2020.0-088 \
+        ## Dependencies
+        libgomp1 \
+        libturbojpeg0-dev \
+        libopenblas-dev \
+        libcurl4-openssl-dev \
+        libatlas-base-dev \
+        libzmq3-dev \
+        liblapack-dev \
+        libopencv-dev \
+        # Caffe
+        caffe-cpu \
+        libcaffe-cpu-dev \
+        ## Frontend languages
+        # Python
+        python3 \
+        python3-pip \
+        python3-nose \
+        python3-nose-timer \
+        # Scala
+        openjdk-8-jdk \
+        openjdk-8-jre \
+        maven \
+        scala \
+        # Clojure
+        clojure \
+        leiningen \
+        # R
+        r-base-core \
+        r-cran-devtools \
+        libcairo2-dev \
+        libxml2-dev \
+        ## Documentation
+        doxygen \
+        pandoc \
+        ## Build-dependencies for ccache 3.7.9
+        gperf \
+        libb2-dev \
+        libzstd-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+# ccache 3.7.9 has fixes for caching nvcc outputs
+RUN cd /usr/local/src && \
+    git clone --recursive https://github.com/ccache/ccache.git && \
+    cd ccache && \
+    git checkout v3.7.9 && \
+    ./autogen.sh && \
+    ./configure --disable-man && \
+    make -j$(nproc) && \
+    make install && \
+    cd /usr/local/src && \
+    rm -rf ccache
+
+# Python & cmake
+COPY install/requirements /work/
+RUN python3 -m pip install cmake==3.16.6 && \
+    python3 -m pip install -r /work/requirements
+
+# Only OpenJDK 8 supported at this time..
+RUN update-java-alternatives -s java-1.8.0-openjdk-amd64
+
+# julia not available on 18.04
+COPY install/ubuntu_julia.sh /work/
+RUN /work/ubuntu_julia.sh
+
+# PDL::CCS missing on 18.04
+COPY install/ubuntu_perl.sh /work/
+RUN /work/ubuntu_perl.sh
+
+# MXNetJS nightly needs emscripten for wasm
+COPY install/ubuntu_emscripten.sh /work/
+RUN /work/ubuntu_emscripten.sh
+
+ARG USER_ID=0
+COPY install/docker_filepermissions.sh /work/
+RUN /work/docker_filepermissions.sh
+
+ENV PYTHONPATH=./python/
+WORKDIR /work/mxnet
+
+COPY runtime_functions.sh /work/
+
+####################################################################################################
+# Specialize base image to install more gpu specific dependencies.
+# The target built by docker can be selected via "--target" option or docker-compose.yml
+####################################################################################################
+FROM base as gpu
+# Install Thrust 1.9.8 to be shipped with Cuda 11.
+# Fixes https://github.com/thrust/thrust/issues/1072 for Clang 10
+# This file can be deleted when using Cuda 11 on CI
+RUN cd /usr/local && \
+    git clone https://github.com/thrust/thrust.git && \
+    cd thrust && \
+    git checkout 1.9.8
+
+
+FROM gpu as gpuwithcudaruntimelibs
+# Special case because the CPP-Package requires the CUDA runtime libs
+# and not only stubs (which are provided by the base image)
+# This prevents usage of this image for actual GPU tests with Docker.
+# This is a bug in CPP-Package and should be fixed.
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get update && \
+    apt install -y  --no-install-recommends \
+        cuda-10-1 && \
+    rm -rf /var/lib/apt/lists/*
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
new file mode 100644
index 000000000000..ca00f9ff86bf
--- /dev/null
+++ b/ci/docker/docker-compose.yml
@@ -0,0 +1,208 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# We use the cache_from feature introduced in file form version 3.4 (released 2017-11-01)
+version: "3.4"
+
+# For simplicity, only the centos7_cpu is commented. But the comments apply to
+# all other services as well.
+services:
+  ###################################################################################################
+  # Dockerfile.build.centos7 based images used for building on CentOS7. On
+  # CentOS7, we respectively test the oldest supported toolchain and dependency
+  # versions
+  ###################################################################################################
+  centos7_cpu:
+    # The resulting image will be named build.centos7_cpu:latest and will be
+    # pushed to the dockerhub user specified in the environment variable
+    # ${DOCKER_CACHE_REGISTRY} (typicall "mxnetci") under this name
+    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.centos7
+      # Use "base" target declared in Dockerfile.build.centos7 as "build.centos7_cpu:latest"  
+      target: base
+      args:
+        # BASE_IMAGE is used to dynamically specify the FROM image in Dockerfile.build.centos7
+        BASE_IMAGE: centos:7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest
+  centos7_gpu_cu92:
+    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.centos7
+      target: gpu
+      args:
+        BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest
+  centos7_gpu_cu100:
+    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.centos7
+      target: gpu
+      args:
+        BASE_IMAGE: nvidia/cuda:10.0-cudnn7-devel-centos7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest
+  centos7_gpu_cu101:
+    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.centos7
+      target: gpu
+      args:
+        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-centos7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest
+  centos7_gpu_cu102:
+    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.centos7
+      target: gpu
+      args:
+        BASE_IMAGE: nvidia/cuda:10.2-cudnn7-devel-centos7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest
+  ###################################################################################################
+  # Dockerfile.build.ubuntu based images. On Ubuntu we test more recent
+  # toolchain and dependency versions compared to CentOS7. We attempt to update
+  # the Ubuntu base image every 6 months, following the Ubuntu release cycle,
+  # and testing the dependencies in their version provided by the respective
+  # Ubuntu release.
+  ###################################################################################################
+  ubuntu_cpu:
+    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.ubuntu
+      target: base
+      args:
+        BASE_IMAGE: ubuntu:18.04
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
+  ubuntu_gpu_cu101:
+    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.ubuntu
+      target: gpu
+      args:
+        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
+  ubuntu_build_cuda:
+    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.ubuntu
+      target: gpuwithcudaruntimelibs
+      args:
+        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
+  ###################################################################################################
+  # Dockerfile.build.android based images used for testing cross-compilation for plain ARM
+  ###################################################################################################
+  armv6:
+    image: ${DOCKER_CACHE_REGISTRY}/build.armv6:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.arm
+      target: armv6
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.armv6:latest
+  armv7:
+    image: ${DOCKER_CACHE_REGISTRY}/build.armv7:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.arm
+      target: armv7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.armv7:latest
+  armv8:
+    image: ${DOCKER_CACHE_REGISTRY}/build.armv8:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.arm
+      target: armv8
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.armv8:latest
+  ###################################################################################################
+  # Dockerfile.test.arm based images for testing ARM artefacts via QEMU
+  ###################################################################################################
+  test.armv7:
+    image: ${DOCKER_CACHE_REGISTRY}/test.armv7:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.test.arm
+      args:
+        BASE_IMAGE: arm32v7/ubuntu:20.04
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/test.armv7:latest
+  test.armv8:
+    image: ${DOCKER_CACHE_REGISTRY}/test.armv8:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.test.arm
+      args:
+        BASE_IMAGE: arm64v8/ubuntu:20.04
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/test.armv8:latest
+  ###################################################################################################
+  # Dockerfile.build.android based images used for testing cross-compilation for Android
+  ###################################################################################################
+  android_armv7:
+    image: ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.android
+      target: armv7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest
+  android_armv8:
+    image: ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.build.android
+      target: armv8
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest
+  ###################################################################################################
+  # Dockerfile.publish.test based images used for testing binary artifacts on minimal systems.
+  ###################################################################################################
+  publish.test.centos7_cpu:
+    image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.publish.test.centos7
+      args:
+        BASE_IMAGE: centos:7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest
+  publish.test.centos7_gpu:
+    image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.publish.test.centos7
+      args:
+        BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7
+      cache_from:
+        - ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 3f5fb2503b56..9fdeda0d4529 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -146,7 +146,7 @@ def compile_unix_int64_cpu() {
 
 def compile_unix_int64_gpu() {
     return ['GPU: USE_INT64_TENSOR_SIZE': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
@@ -751,7 +751,7 @@ def test_unix_python3_mkl_cpu() {
 
 def test_unix_python3_gpu() {
     return ['Python3: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init('gpu', mx_lib_cython)
@@ -867,7 +867,7 @@ def test_unix_python3_mkldnn_mkl_cpu() {
 
 def test_unix_python3_mkldnn_gpu() {
     return ['Python3: MKLDNN-GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib)
@@ -883,7 +883,7 @@ def test_unix_python3_mkldnn_gpu() {
 
 def test_unix_python3_mkldnn_nocudnn_gpu() {
     return ['Python3: MKLDNN-GPU-NOCUDNN': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
@@ -917,7 +917,7 @@ def test_unix_python3_tensorrt_gpu() {
 
 def test_unix_python3_integration_gpu() {
     return ['Python Integration GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -929,24 +929,9 @@ def test_unix_python3_integration_gpu() {
     }]
 }
 
-def test_unix_caffe_gpu() {
-    return ['Caffe GPU': {
-        node(NODE_LINUX_GPU) {
-            ws('workspace/it-caffe') {
-            timeout(time: max_time, unit: 'MINUTES') {
-                utils.init_git()
-                utils.unpack_lib('gpu', mx_lib)
-                utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_caffe', true)
-                utils.publish_test_coverage()
-            }
-            }
-        }
-    }]
-}
-
 def test_unix_cpp_package_gpu() {
-    return ['cpp-package GPU': {
-      node(NODE_LINUX_GPU) {
+    return ['cpp-package GPU Makefile': {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib_cpp_examples)
@@ -959,8 +944,8 @@ def test_unix_cpp_package_gpu() {
 }
 
 def test_unix_capi_cpp_package() {
-    return ['capi-cpp-package GPU': {
-      node(NODE_LINUX_GPU) {
+    return ['capi-cpp-package GPU Makefile': {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu_mkldnn_cpp_test', mx_lib_cpp_capi)
@@ -1001,8 +986,8 @@ def test_unix_scala_mkldnn_cpu(){
 }
 
 def test_unix_scala_gpu() {
-    return ['Scala: GPU': {
-      node(NODE_LINUX_GPU) {
+    return ['Scala: GPU Makefile': {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -1085,7 +1070,7 @@ def test_unix_perl_cpu() {
 
 def test_unix_cpp_gpu() {
     return ['Cpp: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('cmake_gpu', mx_cmake_lib)
@@ -1126,8 +1111,8 @@ def test_unix_cpp_cpu() {
 }
 
 def test_unix_perl_gpu() {
-    return ['Perl: GPU': {
-      node(NODE_LINUX_GPU) {
+    return ['Perl: GPU Makefile': {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -1141,7 +1126,7 @@ def test_unix_perl_gpu() {
 
 def test_unix_r_gpu() {
     return ['R: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
@@ -1209,7 +1194,7 @@ def test_unix_distributed_kvstore_cpu() {
 
 def test_unix_distributed_kvstore_gpu() {
     return ['dist-kvstore tests GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init('gpu', mx_lib)
diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
index f8c28d5d1994..a9feae158311 100644
--- a/ci/jenkins/Jenkinsfile_unix_gpu
+++ b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -29,7 +29,7 @@ node('utility') {
   utils = load('ci/Jenkinsfile_utils.groovy')
   custom_steps = load('ci/jenkins/Jenkins_steps.groovy')
 }
-utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3')
+utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3', linux_gpu_g4: 'mxnetlinux-gpu-g4')
 
 utils.main_wrapper(
 core_logic: {

From c1b54fbb4552e8441f556b2e70bd5f3199dd521b Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Fri, 24 Jul 2020 12:22:07 -0700
Subject: [PATCH 2/9] remove docker compose files

---
 ci/docker/Dockerfile.build.ubuntu | 166 ------------------------
 ci/docker/docker-compose.yml      | 208 ------------------------------
 2 files changed, 374 deletions(-)
 delete mode 100644 ci/docker/Dockerfile.build.ubuntu
 delete mode 100644 ci/docker/docker-compose.yml

diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
deleted file mode 100644
index d5933c3bb628..000000000000
--- a/ci/docker/Dockerfile.build.ubuntu
+++ /dev/null
@@ -1,166 +0,0 @@
-# -*- mode: dockerfile -*-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# Dockerfile for Ubuntu based builds.
-#
-# See docker-compose.yml for supported BASE_IMAGE ARGs and targets.
-
-####################################################################################################
-# The Dockerfile uses a dynamic BASE_IMAGE (for example ubuntu:18.04
-# nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 etc).
-# On top of BASE_IMAGE we install all dependencies shared by all MXNet build
-# environments into a "base" target. At the end of this file, we can specialize
-# "base" for specific usecases. The target built by docker can be selected via
-# "--target" option or docker-compose.yml
-####################################################################################################
-ARG BASE_IMAGE
-FROM $BASE_IMAGE AS base
-
-WORKDIR /work/deps
-
-RUN export DEBIAN_FRONTEND=noninteractive && \
-    apt-get update && \
-    apt-get install -y wget software-properties-common && \
-    wget -qO - http://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
-    wget -qO - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | apt-key add - && \
-    apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" &&  \
-    apt-add-repository "deb https://apt.repos.intel.com/mkl all main" &&  \
-    apt-get update && \
-    apt-get install -y \
-        ## Utilities
-        curl \
-        unzip \
-        pandoc \
-        ## Development tools
-        build-essential \
-        ninja-build \
-        git \
-        protobuf-compiler \
-        libprotobuf-dev \
-        clang-6.0 \
-        clang-tidy-6.0 \
-        python-yaml \
-        clang-10 \
-        g++ \
-        g++-8 \
-        intel-mkl-2020.0-088 \
-        ## Dependencies
-        libgomp1 \
-        libturbojpeg0-dev \
-        libopenblas-dev \
-        libcurl4-openssl-dev \
-        libatlas-base-dev \
-        libzmq3-dev \
-        liblapack-dev \
-        libopencv-dev \
-        # Caffe
-        caffe-cpu \
-        libcaffe-cpu-dev \
-        ## Frontend languages
-        # Python
-        python3 \
-        python3-pip \
-        python3-nose \
-        python3-nose-timer \
-        # Scala
-        openjdk-8-jdk \
-        openjdk-8-jre \
-        maven \
-        scala \
-        # Clojure
-        clojure \
-        leiningen \
-        # R
-        r-base-core \
-        r-cran-devtools \
-        libcairo2-dev \
-        libxml2-dev \
-        ## Documentation
-        doxygen \
-        pandoc \
-        ## Build-dependencies for ccache 3.7.9
-        gperf \
-        libb2-dev \
-        libzstd-dev && \
-    rm -rf /var/lib/apt/lists/*
-
-# ccache 3.7.9 has fixes for caching nvcc outputs
-RUN cd /usr/local/src && \
-    git clone --recursive https://github.com/ccache/ccache.git && \
-    cd ccache && \
-    git checkout v3.7.9 && \
-    ./autogen.sh && \
-    ./configure --disable-man && \
-    make -j$(nproc) && \
-    make install && \
-    cd /usr/local/src && \
-    rm -rf ccache
-
-# Python & cmake
-COPY install/requirements /work/
-RUN python3 -m pip install cmake==3.16.6 && \
-    python3 -m pip install -r /work/requirements
-
-# Only OpenJDK 8 supported at this time..
-RUN update-java-alternatives -s java-1.8.0-openjdk-amd64
-
-# julia not available on 18.04
-COPY install/ubuntu_julia.sh /work/
-RUN /work/ubuntu_julia.sh
-
-# PDL::CCS missing on 18.04
-COPY install/ubuntu_perl.sh /work/
-RUN /work/ubuntu_perl.sh
-
-# MXNetJS nightly needs emscripten for wasm
-COPY install/ubuntu_emscripten.sh /work/
-RUN /work/ubuntu_emscripten.sh
-
-ARG USER_ID=0
-COPY install/docker_filepermissions.sh /work/
-RUN /work/docker_filepermissions.sh
-
-ENV PYTHONPATH=./python/
-WORKDIR /work/mxnet
-
-COPY runtime_functions.sh /work/
-
-####################################################################################################
-# Specialize base image to install more gpu specific dependencies.
-# The target built by docker can be selected via "--target" option or docker-compose.yml
-####################################################################################################
-FROM base as gpu
-# Install Thrust 1.9.8 to be shipped with Cuda 11.
-# Fixes https://github.com/thrust/thrust/issues/1072 for Clang 10
-# This file can be deleted when using Cuda 11 on CI
-RUN cd /usr/local && \
-    git clone https://github.com/thrust/thrust.git && \
-    cd thrust && \
-    git checkout 1.9.8
-
-
-FROM gpu as gpuwithcudaruntimelibs
-# Special case because the CPP-Package requires the CUDA runtime libs
-# and not only stubs (which are provided by the base image)
-# This prevents usage of this image for actual GPU tests with Docker.
-# This is a bug in CPP-Package and should be fixed.
-RUN export DEBIAN_FRONTEND=noninteractive && \
-    apt-get update && \
-    apt install -y  --no-install-recommends \
-        cuda-10-1 && \
-    rm -rf /var/lib/apt/lists/*
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
deleted file mode 100644
index ca00f9ff86bf..000000000000
--- a/ci/docker/docker-compose.yml
+++ /dev/null
@@ -1,208 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# We use the cache_from feature introduced in file form version 3.4 (released 2017-11-01)
-version: "3.4"
-
-# For simplicity, only the centos7_cpu is commented. But the comments apply to
-# all other services as well.
-services:
-  ###################################################################################################
-  # Dockerfile.build.centos7 based images used for building on CentOS7. On
-  # CentOS7, we respectively test the oldest supported toolchain and dependency
-  # versions
-  ###################################################################################################
-  centos7_cpu:
-    # The resulting image will be named build.centos7_cpu:latest and will be
-    # pushed to the dockerhub user specified in the environment variable
-    # ${DOCKER_CACHE_REGISTRY} (typicall "mxnetci") under this name
-    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.centos7
-      # Use "base" target declared in Dockerfile.build.centos7 as "build.centos7_cpu:latest"  
-      target: base
-      args:
-        # BASE_IMAGE is used to dynamically specify the FROM image in Dockerfile.build.centos7
-        BASE_IMAGE: centos:7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest
-  centos7_gpu_cu92:
-    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.centos7
-      target: gpu
-      args:
-        BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu92:latest
-  centos7_gpu_cu100:
-    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.centos7
-      target: gpu
-      args:
-        BASE_IMAGE: nvidia/cuda:10.0-cudnn7-devel-centos7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest
-  centos7_gpu_cu101:
-    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.centos7
-      target: gpu
-      args:
-        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-centos7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest
-  centos7_gpu_cu102:
-    image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.centos7
-      target: gpu
-      args:
-        BASE_IMAGE: nvidia/cuda:10.2-cudnn7-devel-centos7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest
-  ###################################################################################################
-  # Dockerfile.build.ubuntu based images. On Ubuntu we test more recent
-  # toolchain and dependency versions compared to CentOS7. We attempt to update
-  # the Ubuntu base image every 6 months, following the Ubuntu release cycle,
-  # and testing the dependencies in their version provided by the respective
-  # Ubuntu release.
-  ###################################################################################################
-  ubuntu_cpu:
-    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.ubuntu
-      target: base
-      args:
-        BASE_IMAGE: ubuntu:18.04
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
-  ubuntu_gpu_cu101:
-    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.ubuntu
-      target: gpu
-      args:
-        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest
-  ubuntu_build_cuda:
-    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.ubuntu
-      target: gpuwithcudaruntimelibs
-      args:
-        BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest
-  ###################################################################################################
-  # Dockerfile.build.android based images used for testing cross-compilation for plain ARM
-  ###################################################################################################
-  armv6:
-    image: ${DOCKER_CACHE_REGISTRY}/build.armv6:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.arm
-      target: armv6
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.armv6:latest
-  armv7:
-    image: ${DOCKER_CACHE_REGISTRY}/build.armv7:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.arm
-      target: armv7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.armv7:latest
-  armv8:
-    image: ${DOCKER_CACHE_REGISTRY}/build.armv8:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.arm
-      target: armv8
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.armv8:latest
-  ###################################################################################################
-  # Dockerfile.test.arm based images for testing ARM artefacts via QEMU
-  ###################################################################################################
-  test.armv7:
-    image: ${DOCKER_CACHE_REGISTRY}/test.armv7:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.test.arm
-      args:
-        BASE_IMAGE: arm32v7/ubuntu:20.04
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/test.armv7:latest
-  test.armv8:
-    image: ${DOCKER_CACHE_REGISTRY}/test.armv8:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.test.arm
-      args:
-        BASE_IMAGE: arm64v8/ubuntu:20.04
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/test.armv8:latest
-  ###################################################################################################
-  # Dockerfile.build.android based images used for testing cross-compilation for Android
-  ###################################################################################################
-  android_armv7:
-    image: ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.android
-      target: armv7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.android_armv7:latest
-  android_armv8:
-    image: ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.build.android
-      target: armv8
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.android_armv8:latest
-  ###################################################################################################
-  # Dockerfile.publish.test based images used for testing binary artifacts on minimal systems.
-  ###################################################################################################
-  publish.test.centos7_cpu:
-    image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.publish.test.centos7
-      args:
-        BASE_IMAGE: centos:7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_cpu:latest
-  publish.test.centos7_gpu:
-    image: ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest
-    build:
-      context: .
-      dockerfile: Dockerfile.publish.test.centos7
-      args:
-        BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7
-      cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/publish.test.centos7_gpu:latest

From 08674b438bc9fe68680d608f8c32720961f2399c Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Fri, 24 Jul 2020 12:25:11 -0700
Subject: [PATCH 3/9] add back the caffe test since caffe is deprecated for
 mx2.0 and not 1.x

---
 ci/jenkins/Jenkins_steps.groovy | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 9fdeda0d4529..b50efe9be5d3 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -929,6 +929,21 @@ def test_unix_python3_integration_gpu() {
     }]
 }
 
+def test_unix_caffe_gpu() {
+    return ['Caffe GPU': {
+        node(NODE_LINUX_GPU) {
+            ws('workspace/it-caffe') {
+            timeout(time: max_time, unit: 'MINUTES') {
+                utils.init_git()
+                utils.unpack_lib('gpu', mx_lib)
+                utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_caffe', true)
+                utils.publish_test_coverage()
+            }
+            }
+        }
+    }]
+}
+
 def test_unix_cpp_package_gpu() {
     return ['cpp-package GPU Makefile': {
       node(NODE_LINUX_GPU_G4) {

From c6b32ee0936b48883703d51366a7821614fd4071 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Mon, 27 Jul 2020 12:19:22 -0700
Subject: [PATCH 4/9] drop nvidia-docker requirement since docker19.0 supports
 it by default

:q
---
 ci/build.py | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 3224dbbce957..f998cf54739d 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -66,22 +66,18 @@ def get_dockerfile(platform: str, path=get_dockerfiles_path()) -> str:
     return os.path.join(path, "Dockerfile.{0}".format(platform))
 
 
-def get_docker_binary(use_nvidia_docker: bool) -> str:
-    return "nvidia-docker" if use_nvidia_docker else "docker"
-
-
-def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int, no_cache: bool) -> str:
+def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool,
+                 cache_intermediate: bool) -> str:
     """
     Build a container for the given platform
     :param platform: Platform
-    :param docker_binary: docker binary to use (docker/nvidia-docker)
     :param registry: Dockerhub registry name
     :param num_retries: Number of retries to build the docker image
     :param no_cache: pass no-cache to docker to rebuild the images
     :return: Id of the top level image
     """
     tag = get_docker_tag(platform=platform, registry=registry)
-    logging.info("Building docker container tagged '%s' with %s", tag, docker_binary)
+    logging.info("Building docker container tagged '%s'", tag)
     #
     # We add a user with the same group as the executing non-root user so files created in the
     # container match permissions of the local user. Same for the group.
@@ -98,7 +94,7 @@ def build_docker(platform: str, docker_binary: str, registry: str, num_retries:
     #
     # This doesn't work with multi head docker files.
     #
-    cmd = [docker_binary, "build",
+    cmd = ["docker", "build",
            "-f", get_dockerfile(platform),
            "--build-arg", "USER_ID={}".format(os.getuid()),
            "--build-arg", "GROUP_ID={}".format(os.getgid())]
@@ -116,19 +112,19 @@ def run_cmd():
     run_cmd()
     # Get image id by reading the tag. It's guaranteed (except race condition) that the tag exists. Otherwise, the
     # check_call would have failed
-    image_id = _get_local_image_id(docker_binary=docker_binary, docker_tag=tag)
+    image_id = _get_local_image_id(docker_tag=tag)
     if not image_id:
         raise FileNotFoundError('Unable to find docker image id matching with {}'.format(tag))
     return image_id
 
 
-def _get_local_image_id(docker_binary, docker_tag):
+def _get_local_image_id(docker_tag):
     """
     Get the image id of the local docker layer with the passed tag
     :param docker_tag: docker tag
     :return: Image id as string or None if tag does not exist
     """
-    cmd = [docker_binary, "images", "-q", docker_tag]
+    cmd = ["docker", "images", "-q", docker_tag]
     image_id_b = check_output(cmd)
     image_id = image_id_b.decode('utf-8').strip()
     if not image_id:
@@ -347,7 +343,6 @@ def main() -> int:
     args = parser.parse_args()
 
     command = list(chain(*args.command))
-    docker_binary = get_docker_binary(args.nvidiadocker)
     docker_client = SafeDockerClient()
 
     environment = dict([(e.split('=')[:2] if '=' in e else (e, os.environ[e]))
@@ -361,7 +356,7 @@ def main() -> int:
         if args.docker_registry:
             load_docker_cache(tag=tag, docker_registry=args.docker_registry)
         if not args.run_only:
-            build_docker(platform=platform, docker_binary=docker_binary, registry=args.docker_registry,
+            build_docker(platform=platform, registry=args.docker_registry,
                          num_retries=args.docker_build_retries, no_cache=args.no_cache)
         else:
             logging.info("Skipping docker build step.")
@@ -404,7 +399,7 @@ def main() -> int:
         for platform in platforms:
             tag = get_docker_tag(platform=platform, registry=args.docker_registry)
             load_docker_cache(tag=tag, docker_registry=args.docker_registry)
-            build_docker(platform, docker_binary=docker_binary, registry=args.docker_registry,
+            build_docker(platform, registry=args.docker_registry,
                          num_retries=args.docker_build_retries, no_cache=args.no_cache)
             if args.build_only:
                 continue

From ddad335174e41e2d1f318d8817ffeab9bf82ef7e Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sun, 16 Aug 2020 10:02:08 -0700
Subject: [PATCH 5/9] remove compat from dockerfile

---
 ci/docker/Dockerfile.build.ubuntu_gpu_cu101 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101
index 717a5aa47f0e..a17261bfa820 100644
--- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101
+++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101
@@ -79,4 +79,3 @@ RUN /work/ubuntu_adduser.sh
 COPY runtime_functions.sh /work/
 
 WORKDIR /work/mxnet
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat

From 90d3ace759eeb46c19e04e6834d84f494c6c09fc Mon Sep 17 00:00:00 2001
From: Carin Meier <cmeier@gigasquidsoftware.com>
Date: Sun, 16 Aug 2020 12:53:47 -0400
Subject: [PATCH 6/9] Cherry-pick #18635 to v1.7.x (#18935)

* Remove mention of nightly in pypi (#18635)

* update bert dev.tsv link

Co-authored-by: Sheng Zha <szha@users.noreply.github.com>
---
 contrib/clojure-package/examples/bert/get_bert_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/clojure-package/examples/bert/get_bert_data.sh b/contrib/clojure-package/examples/bert/get_bert_data.sh
index 10ed8e9a1f8e..e640857d1193 100755
--- a/contrib/clojure-package/examples/bert/get_bert_data.sh
+++ b/contrib/clojure-package/examples/bert/get_bert_data.sh
@@ -28,5 +28,5 @@ if [ ! -d "$data_path" ]; then
   curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/static_bert_qa-symbol.json -o $data_path/static_bert_qa-symbol.json
   curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/static_bert_base_net-symbol.json -o $data_path/static_bert_base_net-symbol.json
   curl https://s3.us-east-2.amazonaws.com/mxnet-scala/scala-example-ci/BertQA/static_bert_base_net-0000.params -o $data_path/static_bert_base_net-0000.params
-  curl https://raw.githubusercontent.com/dmlc/gluon-nlp/master/docs/examples/sentence_embedding/dev.tsv -o $data_path/dev.tsv
+  curl https://raw.githubusercontent.com/dmlc/gluon-nlp/d75185ec7eb1eb082ee92992be8677666aaf7ec7/docs/examples/sentence_embedding/dev.tsv -o $data_path/dev.tsv
 fi

From 3fb24fa8f28c356a829b69acd7b7bb6ef3c9e2d7 Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sun, 16 Aug 2020 12:04:38 -0700
Subject: [PATCH 7/9] disable tvm in CI functions that rely on libcuda compat

---
 ci/docker/runtime_functions.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 4b544e4b63ef..ff815baa3c0d 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -874,7 +874,7 @@ build_ubuntu_gpu_cmake_mkldnn() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=1                            \
         -DUSE_CUDNN=1                           \
-        -DUSE_TVM_OP=1                          \
+        -DUSE_TVM_OP=0                          \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKLML_MKL=1                       \
         -DCMAKE_BUILD_TYPE=Release              \
@@ -916,7 +916,7 @@ build_ubuntu_gpu_cmake_no_rtc() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=ON                         \
+        -DUSE_TVM_OP=OFF                         \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \
@@ -980,7 +980,7 @@ build_ubuntu_gpu_large_tensor() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=ON                         \
+        -DUSE_TVM_OP=OFF                         \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \

From 4ef8db76e7d7db4a9efd97ea06452ffd859994ac Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Sun, 16 Aug 2020 18:20:17 -0700
Subject: [PATCH 8/9] tvm off for ubuntu_gpu_cmake build

---
 ci/docker/runtime_functions.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index ff815baa3c0d..65d912092b1b 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -893,7 +893,7 @@ build_ubuntu_gpu_cmake() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=ON                         \
+        -DUSE_TVM_OP=OFF                         \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \

From 232538f4f4f704fd5d01b97b66655bab5393942b Mon Sep 17 00:00:00 2001
From: ChaiBapchya <chai.bapat@gmail.com>
Date: Mon, 17 Aug 2020 19:31:55 -0700
Subject: [PATCH 9/9] drop tvm from all unix-gpu builds

---
 ci/docker/runtime_functions.sh  | 49 ++-------------------------------
 ci/jenkins/Jenkins_steps.groovy | 43 -----------------------------
 ci/jenkins/Jenkinsfile_unix_gpu |  3 --
 3 files changed, 3 insertions(+), 92 deletions(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 65d912092b1b..4523e1f017f5 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -767,7 +767,7 @@ build_ubuntu_gpu_mkldnn() {
         USE_CUDA=1                                \
         USE_CUDA_PATH=/usr/local/cuda             \
         USE_CUDNN=1                               \
-        USE_TVM_OP=1                              \
+        USE_TVM_OP=0                              \
         CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
         USE_SIGNAL_HANDLER=1                      \
         -j$(nproc)
@@ -784,7 +784,7 @@ build_ubuntu_gpu_mkldnn_nocudnn() {
         USE_CUDA=1                                \
         USE_CUDA_PATH=/usr/local/cuda             \
         USE_CUDNN=0                               \
-        USE_TVM_OP=1                              \
+        USE_TVM_OP=0                              \
         CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
         USE_SIGNAL_HANDLER=1                      \
         -j$(nproc)
@@ -799,7 +799,7 @@ build_ubuntu_gpu_cuda101_cudnn7() {
         USE_CUDA=1                                \
         USE_CUDA_PATH=/usr/local/cuda             \
         USE_CUDNN=1                               \
-        USE_TVM_OP=1                              \
+        USE_TVM_OP=0                              \
         USE_CPP_PACKAGE=1                         \
         USE_DIST_KVSTORE=1                        \
         CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
@@ -827,26 +827,6 @@ build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() {
     make cython PYTHON=python3
 }
 
-build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op() {
-    set -ex
-    build_ccache_wrappers
-    make \
-        DEV=1                                     \
-        USE_BLAS=openblas                         \
-        USE_MKLDNN=0                              \
-        USE_CUDA=1                                \
-        USE_CUDA_PATH=/usr/local/cuda             \
-        USE_CUDNN=1                               \
-        USE_TVM_OP=0                              \
-        USE_CPP_PACKAGE=1                         \
-        USE_DIST_KVSTORE=1                        \
-        CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
-        USE_SIGNAL_HANDLER=1                      \
-        -j$(nproc)
-
-    make cython PYTHON=python3
-}
-
 build_ubuntu_amalgamation() {
     set -ex
     # Amalgamation can not be run with -j nproc
@@ -932,29 +912,6 @@ build_ubuntu_gpu_cmake_no_rtc() {
     ninja
 }
 
-build_ubuntu_gpu_cmake_no_tvm_op() {
-    set -ex
-    cd /work/build
-    build_ccache_wrappers
-    cmake \
-        -DUSE_SIGNAL_HANDLER=ON                 \
-        -DUSE_CUDA=ON                           \
-        -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=OFF                        \
-        -DPython3_EXECUTABLE=/usr/bin/python3   \
-        -DUSE_MKL_IF_AVAILABLE=OFF              \
-        -DUSE_MKLML_MKL=OFF                     \
-        -DUSE_MKLDNN=OFF                        \
-        -DUSE_DIST_KVSTORE=ON                   \
-        -DCMAKE_BUILD_TYPE=Release              \
-        -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-        -DBUILD_CYTHON_MODULES=1                \
-        -G Ninja                                \
-        /work/mxnet
-
-    ninja
-}
-
 build_ubuntu_cpu_large_tensor() {
     set -ex
     cd /work/build
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 2831d6d044e6..c4fd96e65ac0 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -253,20 +253,6 @@ def compile_unix_full_gpu_mkldnn_cpp_test() {
     }]
 }
 
-def compile_unix_full_gpu_no_tvm_op() {
-    return ['GPU: CUDA10.1+cuDNN7 TVM_OP OFF': {
-      node(NODE_LINUX_CPU) {
-        ws('workspace/build-gpu-no-tvm-op') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
-            utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-          }
-        }
-      }
-    }]
-}
-
 def compile_unix_cmake_mkldnn_gpu() {
     return ['GPU: CMake MKLDNN': {
       node(NODE_LINUX_CPU) {
@@ -295,19 +281,6 @@ def compile_unix_cmake_gpu() {
     }]
 }
 
-def compile_unix_cmake_gpu_no_tvm_op() {
-    return ['GPU: CMake TVM_OP OFF': {
-      node(NODE_LINUX_CPU) {
-        ws('workspace/build-cmake-gpu-no-tvm-op') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
-          }
-        }
-      }
-    }]
-}
-
 def compile_unix_cmake_gpu_no_rtc() {
     return ['GPU: CMake CUDA RTC OFF': {
         node(NODE_LINUX_CPU) {
@@ -764,22 +737,6 @@ def test_unix_python3_gpu() {
     }]
 }
 
-def test_unix_python3_gpu_no_tvm_op() {
-    return ['Python3: GPU TVM_OP OFF': {
-      node(NODE_LINUX_GPU) {
-        ws('workspace/ut-python3-gpu-no-tvm-op') {
-          try {
-            utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
-            utils.publish_test_coverage()
-          } finally {
-            utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_gpu.xml')
-          }
-        }
-      }
-    }]
-}
-
 def test_unix_python3_quantize_gpu() {
     return ['Python3: Quantize GPU': {
       node(NODE_LINUX_GPU_P3) {
diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
index a9feae158311..5e26a9f41380 100644
--- a/ci/jenkins/Jenkinsfile_unix_gpu
+++ b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -41,8 +41,6 @@ core_logic: {
     custom_steps.compile_unix_cmake_gpu(),
     custom_steps.compile_unix_tensorrt_gpu(),
     custom_steps.compile_unix_int64_gpu(),
-    custom_steps.compile_unix_full_gpu_no_tvm_op(),
-    custom_steps.compile_unix_cmake_gpu_no_tvm_op(),
     custom_steps.compile_unix_cmake_gpu_no_rtc(),
     custom_steps.compile_unix_full_gpu_mkldnn_cpp_test()
   ])
@@ -63,7 +61,6 @@ core_logic: {
     custom_steps.test_unix_distributed_kvstore_gpu(),
     custom_steps.test_static_python_gpu(),
     custom_steps.test_static_python_gpu_cmake(),
-    custom_steps.test_unix_python3_gpu_no_tvm_op(),
     custom_steps.test_unix_capi_cpp_package(),
 
     // Disabled due to: https://github.com/apache/incubator-mxnet/issues/11407