From 16faca90168661e1c538fbe1c098f7faed98c3af Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Mon, 14 Sep 2020 17:23:40 -0700 Subject: [PATCH 01/12] Add new docker containers for Cuda 11.0 and libcudnn8. --- .../Dockerfile.build.ubuntu_build_cuda110 | 66 +++++++++++++++ ci/docker/Dockerfile.build.ubuntu_gpu_cu110 | 82 +++++++++++++++++++ ci/docker/install/ubuntu_cudnn.sh | 32 +++++--- 3 files changed, 169 insertions(+), 11 deletions(-) create mode 100644 ci/docker/Dockerfile.build.ubuntu_build_cuda110 create mode 100644 ci/docker/Dockerfile.build.ubuntu_gpu_cu110 diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda110 b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 new file mode 100644 index 000000000000..9db96d097531 --- /dev/null +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 @@ -0,0 +1,66 @@ +# -*- mode: dockerfile -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Dockerfile to build MXNet on Ubuntu 16.04 for GPU but on +# a CPU-only instance. This restriction is caused by the CPP- +# package generation, requiring the actual CUDA library to be +# present + +FROM nvidia/cuda:11.0-devel-ubuntu16.04 + +WORKDIR /work/deps + +COPY install/ubuntu_core.sh /work/ +RUN /work/ubuntu_core.sh +COPY install/deb_ubuntu_ccache.sh /work/ +RUN /work/deb_ubuntu_ccache.sh +COPY install/ubuntu_python.sh /work/ +COPY install/requirements /work/ +RUN /work/ubuntu_python.sh +COPY install/ubuntu_scala.sh /work/ +COPY install/sbt.gpg /work/ +RUN /work/ubuntu_scala.sh +COPY install/ubuntu_r.sh /work/ +COPY install/r.gpg /work/ +RUN /work/ubuntu_r.sh +COPY install/ubuntu_perl.sh /work/ +RUN /work/ubuntu_perl.sh +COPY install/ubuntu_clang.sh /work/ +RUN /work/ubuntu_clang.sh +COPY install/ubuntu_binutils.sh /work/ +RUN /work/ubuntu_binutils.sh + +ENV CUDA_VERSION=11.0.221 +ENV CUDNN_VERSION=8.0.3.33 +COPY install/ubuntu_cudnn.sh /work/ +RUN /work/ubuntu_cudnn.sh + +# Special case because the CPP-Package requires the CUDA runtime libs +# and not only stubs (which are provided by the base image) +COPY install/ubuntu_nvidia.sh /work/ +RUN /work/ubuntu_nvidia.sh + +# Keep this at the end since this command is not cachable +ARG USER_ID=0 +ARG GROUP_ID=0 +COPY install/ubuntu_adduser.sh /work/ +RUN /work/ubuntu_adduser.sh + +COPY runtime_functions.sh /work/ + +WORKDIR /work/mxnet diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 new file mode 100644 index 000000000000..2f0644050d2b --- /dev/null +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 @@ -0,0 +1,82 @@ +# -*- mode: dockerfile -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Dockerfile to run MXNet on Ubuntu 16.04 for GPU + +FROM nvidia/cuda:11.0-devel-ubuntu16.04 + +WORKDIR /work/deps + +COPY install/ubuntu_core.sh /work/ +RUN /work/ubuntu_core.sh + +COPY install/deb_ubuntu_ccache.sh /work/ +RUN /work/deb_ubuntu_ccache.sh + +COPY install/ubuntu_python.sh /work/ +COPY install/requirements /work/ +RUN /work/ubuntu_python.sh + +COPY install/ubuntu_scala.sh /work/ +COPY install/sbt.gpg /work/ +RUN /work/ubuntu_scala.sh + +COPY install/ubuntu_r.sh /work/ +COPY install/r.gpg /work/ +RUN /work/ubuntu_r.sh + +COPY install/ubuntu_perl.sh /work/ +RUN /work/ubuntu_perl.sh + +COPY install/ubuntu_clang.sh /work/ +RUN /work/ubuntu_clang.sh + +COPY install/ubuntu_tvm.sh /work/ +RUN /work/ubuntu_tvm.sh + +COPY install/ubuntu_llvm.sh /work/ +RUN /work/ubuntu_llvm.sh + +COPY install/ubuntu_caffe.sh /work/ +RUN /work/ubuntu_caffe.sh + +COPY install/ubuntu_onnx.sh /work/ +RUN /work/ubuntu_onnx.sh + +COPY install/ubuntu_docs.sh /work/ +COPY install/requirements /work/ +RUN /work/ubuntu_docs.sh + +COPY install/ubuntu_tutorials.sh /work/ +RUN /work/ubuntu_tutorials.sh + +ENV CUDA_VERSION=11.0.221 +ENV CUDNN_VERSION=8.0.3.33 +COPY install/ubuntu_cudnn.sh /work/ +RUN /work/ubuntu_cudnn.sh + +# Always last +ARG USER_ID=0 +ARG GROUP_ID=0 +COPY install/ubuntu_adduser.sh /work/ +RUN /work/ubuntu_adduser.sh + +COPY runtime_functions.sh /work/ + +WORKDIR /work/mxnet +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat diff --git a/ci/docker/install/ubuntu_cudnn.sh b/ci/docker/install/ubuntu_cudnn.sh index eaf50447305c..1cc0b722bfc7 100755 --- a/ci/docker/install/ubuntu_cudnn.sh +++ b/ci/docker/install/ubuntu_cudnn.sh @@ -32,25 +32,35 @@ fi apt-get update || true case ${CUDA_VERSION} in + 11\.0*) + export libcudnn_package="libcudnn8" + export libcudnn_version="${CUDNN_VERSION}-1+cuda11.0" + export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda11.0" + ;; 10\.2*) - export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.2" - export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.2" + export libcudnn_package="libcudnn7" + export libcudnn_version="${CUDNN_VERSION}-1+cuda10.2" + export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda10.2" ;; 10\.1*) - export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.1" - export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.1" + export libcudnn_package="libcudnn7" + export libcudnn_version="${CUDNN_VERSION}-1+cuda10.1" + export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda10.1" ;; 10\.0*) - export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.0" - export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.0" + export libcudnn_package="libcudnn7" + export libcudnn_version="${CUDNN_VERSION}-1+cuda10.0" + export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda10.0" ;; 9\.0*) - export libcudnn7_version="${CUDNN_VERSION}-1+cuda9.0" - export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda9.0" + export libcudnn_package="libcudnn7" + export libcudnn_version="${CUDNN_VERSION}-1+cuda9.0" + export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda9.0" ;; 9\.2*) - export libcudnn7_version="${CUDNN_VERSION}-1+cuda9.2" - export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda9.2" + export libcudnn_package="libcudnn7" + export libcudnn_version="${CUDNN_VERSION}-1+cuda9.2" + export libcudnn_dev_version="${CUDNN_VERSION}-1+cuda9.2" ;; *) echo "Unsupported CUDA version ${CUDA_VERSION}" @@ -58,5 +68,5 @@ case ${CUDA_VERSION} in ;; esac -apt-get install -y --allow-downgrades libcudnn7=${libcudnn7_version} libcudnn7-dev=${libcudnn7_dev_version} +apt-get install -y --allow-downgrades ${libcudnn_package}=${libcudnn_version} ${libcudnn_package}-dev=${libcudnn_dev_version} From 1579f9fc8f122e7262db9880f996d1cf860ce75f Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Mon, 14 Sep 2020 20:50:06 -0700 Subject: [PATCH 02/12] Add new functions for running GPU builds and tests in new Cuda11 containers. --- ci/jenkins/Jenkins_steps.groovy | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 1cc91e4f4247..6dee23e0b0a1 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -239,6 +239,20 @@ def compile_unix_full_gpu() { }] } +def compile_unix_full_gpu_cu110() { + return ['GPU: CUDA11.0+cuDNN8': { + node(NODE_LINUX_CPU) { + ws('workspace/build-gpu') { + timeout(time: max_time, unit: 'MINUTES') { + utils.init_git() + utils.docker_run('ubuntu_build_cuda110', 'build_ubuntu_gpu_cuda110_cudnn8', false) + utils.pack_lib('gpu_cu110', mx_lib_cpp_examples) + } + } + } + }] +} + def compile_unix_full_gpu_mkldnn_cpp_test() { return ['GPU: CUDA10.1+cuDNN7+MKLDNN+CPPTEST': { node(NODE_LINUX_CPU) { @@ -737,6 +751,22 @@ def test_unix_python3_gpu() { }] } +def test_unix_python3_gpu_cu110() { + return ['Python3+CUDA11.0: GPU': { + node(NODE_LINUX_GPU_G4) { + ws('workspace/ut-python3-gpu') { + try { + utils.unpack_and_init('gpu_cu110', mx_lib_cython) + python3_gpu_ut_cython('ubuntu_gpu_cu110') + utils.publish_test_coverage() + } finally { + utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_gpu.xml') + } + } + } + }] +} + def test_unix_python3_quantize_gpu() { return ['Python3: Quantize GPU': { node(NODE_LINUX_GPU_P3) { @@ -755,6 +785,24 @@ def test_unix_python3_quantize_gpu() { }] } +def test_unix_python3_quantize_gpu_cu110() { + return ['Python3+CUDA11.0: Quantize GPU': { + node(NODE_LINUX_GPU_P3) { + ws('workspace/ut-python3-quantize-gpu') { + timeout(time: max_time, unit: 'MINUTES') { + try { + utils.unpack_and_init('gpu_cu110', mx_lib) + utils.docker_run('ubuntu_gpu_cu110', 'unittest_ubuntu_python3_quantization_gpu', true) + utils.publish_test_coverage() + } finally { + utils.collect_test_results_unix('nosetests_quantization_gpu.xml', 'nosetests_python3_quantize_gpu.xml') + } + } + } + } + }] +} + def test_unix_python3_debug_cpu() { return ['Python3: CPU debug': { node(NODE_LINUX_CPU) { From bf3d473ec272735a5d5eabc89a6f0e4260e6d386 Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Mon, 14 Sep 2020 20:51:57 -0700 Subject: [PATCH 03/12] Add runtime functions for cuda 11.0 related builds/tests. --- ci/docker/runtime_functions.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index f8f2b570a32d..e6558aac7aa8 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -806,6 +806,24 @@ build_ubuntu_gpu_cuda101_cudnn7() { make cython PYTHON=python3 } +build_ubuntu_gpu_cuda110_cudnn8() { + set -ex + build_ccache_wrappers + make \ + USE_BLAS=openblas \ + USE_MKLDNN=0 \ + USE_CUDA=1 \ + USE_CUDA_PATH=/usr/local/cuda \ + USE_CUDNN=1 \ + USE_TVM_OP=0 \ + USE_CPP_PACKAGE=1 \ + USE_DIST_KVSTORE=1 \ + CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ + USE_SIGNAL_HANDLER=1 \ + -j$(nproc) + make cython PYTHON=python3 +} + build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() { set -ex build_ccache_wrappers @@ -1083,6 +1101,18 @@ unittest_ubuntu_python3_quantization_gpu() { nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu } +unittest_ubuntu_python3_quantization_gpu_cu110() { + set -ex + export PYTHONPATH=./python/ + export MXNET_MKLDNN_DEBUG=0 # Ignored if not present + export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 + export MXNET_SUBGRAPH_VERBOSE=0 + export CUDNN_VERSION=${CUDNN_VERSION:-8.0.33} + export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 + nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu +} + unittest_centos7_cpu_scala() { set -ex cd /work/mxnet From ade89443465970f9c22284f4a2d65fbf712041cd Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Mon, 14 Sep 2020 20:52:26 -0700 Subject: [PATCH 04/12] Add new pipeline for testing cuda 11.0 builds. --- ci/jenkins/Jenkinsfile_unix_gpu_cu110 | 52 +++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 ci/jenkins/Jenkinsfile_unix_gpu_cu110 diff --git a/ci/jenkins/Jenkinsfile_unix_gpu_cu110 b/ci/jenkins/Jenkinsfile_unix_gpu_cu110 new file mode 100644 index 000000000000..3f663dc996dd --- /dev/null +++ b/ci/jenkins/Jenkinsfile_unix_gpu_cu110 @@ -0,0 +1,52 @@ +// -*- mode: groovy -*- + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Jenkins pipeline +// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/ + +// timeout in minutes +max_time = 180 + +node('utility') { + // Loading the utilities requires a node context unfortunately + checkout scm + utils = load('ci/Jenkinsfile_utils.groovy') + custom_steps = load('ci/jenkins/Jenkins_steps.groovy') +} +utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3', linux_gpu_g4: 'mxnetlinux-gpu-g4') + +utils.main_wrapper( +core_logic: { + utils.parallel_stage('Build', [ + custom_steps.compile_unix_full_gpu_cu110() + ]) + + utils.parallel_stage('Tests', [ + custom_steps.test_unix_python3_gpu_cu110(), + custom_steps.test_unix_python3_quantize_gpu_cu110() + ]) +} +, +failure_handler: { + // Only send email if master or release branches failed + if (currentBuild.result == "FAILURE" && (env.BRANCH_NAME == "master" || env.BRANCH_NAME.startsWith("v"))) { + emailext body: 'Build for MXNet branch ${BRANCH_NAME} has broken. Please view the build at ${BUILD_URL}', replyTo: '${EMAIL}', subject: '[BUILD FAILED] Branch ${BRANCH_NAME} build ${BUILD_NUMBER}', to: '${EMAIL}' + } +} +) From 7a0e86fcb896607e1ecea71167ee8fc59585758e Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Mon, 14 Sep 2020 20:56:03 -0700 Subject: [PATCH 05/12] Run cuda11 pipeline when sanity completes. --- ci/jenkins/Jenkinsfile_full | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/jenkins/Jenkinsfile_full b/ci/jenkins/Jenkinsfile_full index 415bd7b8dde0..fcdb4c2a5eeb 100644 --- a/ci/jenkins/Jenkinsfile_full +++ b/ci/jenkins/Jenkinsfile_full @@ -31,6 +31,7 @@ def buildJobs = [ 'miscellaneous', 'unix-cpu', 'unix-gpu', + 'unix-gpu-cu110', 'website', 'windows-cpu', 'windows-gpu' From d48040efa375b36bdde564fdff06d91524d0149d Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Tue, 15 Sep 2020 10:01:20 -0700 Subject: [PATCH 06/12] Use base image that already has libcudnn8 installed from Nvidia. Remove calls to nvidia/cudnn install scripts. --- ci/docker/Dockerfile.build.ubuntu_build_cuda110 | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda110 b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 index 9db96d097531..a90faed4da67 100644 --- a/ci/docker/Dockerfile.build.ubuntu_build_cuda110 +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 @@ -21,7 +21,7 @@ # package generation, requiring the actual CUDA library to be # present -FROM nvidia/cuda:11.0-devel-ubuntu16.04 +FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu16.04 WORKDIR /work/deps @@ -45,16 +45,6 @@ RUN /work/ubuntu_clang.sh COPY install/ubuntu_binutils.sh /work/ RUN /work/ubuntu_binutils.sh -ENV CUDA_VERSION=11.0.221 -ENV CUDNN_VERSION=8.0.3.33 -COPY install/ubuntu_cudnn.sh /work/ -RUN /work/ubuntu_cudnn.sh - -# Special case because the CPP-Package requires the CUDA runtime libs -# and not only stubs (which are provided by the base image) -COPY install/ubuntu_nvidia.sh /work/ -RUN /work/ubuntu_nvidia.sh - # Keep this at the end since this command is not cachable ARG USER_ID=0 ARG GROUP_ID=0 From 87895a38a980282009ed96c30faae8c64754ede1 Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Tue, 15 Sep 2020 11:22:25 -0700 Subject: [PATCH 07/12] Don't build CPP package for cuda11 build. --- ci/docker/runtime_functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index e6558aac7aa8..6981045dc552 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -816,7 +816,7 @@ build_ubuntu_gpu_cuda110_cudnn8() { USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=1 \ USE_TVM_OP=0 \ - USE_CPP_PACKAGE=1 \ + USE_CPP_PACKAGE=0 \ USE_DIST_KVSTORE=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ From b8c04d6cb1cb35558eea8ccb74d086c3b03b41b0 Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Tue, 15 Sep 2020 11:22:54 -0700 Subject: [PATCH 08/12] Use proper base docker image for testing (include cudnn8) and don't manually install cudnn8. --- ci/docker/Dockerfile.build.ubuntu_gpu_cu110 | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 index 2f0644050d2b..3a0cea0677d9 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 @@ -18,7 +18,7 @@ # # Dockerfile to run MXNet on Ubuntu 16.04 for GPU -FROM nvidia/cuda:11.0-devel-ubuntu16.04 +FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu16.04 WORKDIR /work/deps @@ -65,11 +65,6 @@ RUN /work/ubuntu_docs.sh COPY install/ubuntu_tutorials.sh /work/ RUN /work/ubuntu_tutorials.sh -ENV CUDA_VERSION=11.0.221 -ENV CUDNN_VERSION=8.0.3.33 -COPY install/ubuntu_cudnn.sh /work/ -RUN /work/ubuntu_cudnn.sh - # Always last ARG USER_ID=0 ARG GROUP_ID=0 From 1ac0b7d676997224f5de2f75da7e23eb47361de1 Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Tue, 15 Sep 2020 21:20:29 +0000 Subject: [PATCH 09/12] Re-enable CPP package build. --- ci/docker/runtime_functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 6981045dc552..e6558aac7aa8 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -816,7 +816,7 @@ build_ubuntu_gpu_cuda110_cudnn8() { USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=1 \ USE_TVM_OP=0 \ - USE_CPP_PACKAGE=0 \ + USE_CPP_PACKAGE=1 \ USE_DIST_KVSTORE=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ From b969cda78de31a54a88d3b85cb4d3095cad16785 Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Tue, 15 Sep 2020 21:21:28 +0000 Subject: [PATCH 10/12] Add env variable LD_LIBRARY_PATH in the build container so cpp-packagee build works. Remove unneeded components of docker containers to reduce size and build time. --- .../Dockerfile.build.ubuntu_build_cuda110 | 14 ++------ ci/docker/Dockerfile.build.ubuntu_gpu_cu110 | 33 ------------------- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda110 b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 index a90faed4da67..d4ca059dbf06 100644 --- a/ci/docker/Dockerfile.build.ubuntu_build_cuda110 +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 @@ -32,18 +32,6 @@ RUN /work/deb_ubuntu_ccache.sh COPY install/ubuntu_python.sh /work/ COPY install/requirements /work/ RUN /work/ubuntu_python.sh -COPY install/ubuntu_scala.sh /work/ -COPY install/sbt.gpg /work/ -RUN /work/ubuntu_scala.sh -COPY install/ubuntu_r.sh /work/ -COPY install/r.gpg /work/ -RUN /work/ubuntu_r.sh -COPY install/ubuntu_perl.sh /work/ -RUN /work/ubuntu_perl.sh -COPY install/ubuntu_clang.sh /work/ -RUN /work/ubuntu_clang.sh -COPY install/ubuntu_binutils.sh /work/ -RUN /work/ubuntu_binutils.sh # Keep this at the end since this command is not cachable ARG USER_ID=0 @@ -54,3 +42,5 @@ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ WORKDIR /work/mxnet +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat + diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 index 3a0cea0677d9..336b76e2a3e8 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 @@ -32,39 +32,6 @@ COPY install/ubuntu_python.sh /work/ COPY install/requirements /work/ RUN /work/ubuntu_python.sh -COPY install/ubuntu_scala.sh /work/ -COPY install/sbt.gpg /work/ -RUN /work/ubuntu_scala.sh - -COPY install/ubuntu_r.sh /work/ -COPY install/r.gpg /work/ -RUN /work/ubuntu_r.sh - -COPY install/ubuntu_perl.sh /work/ -RUN /work/ubuntu_perl.sh - -COPY install/ubuntu_clang.sh /work/ -RUN /work/ubuntu_clang.sh - -COPY install/ubuntu_tvm.sh /work/ -RUN /work/ubuntu_tvm.sh - -COPY install/ubuntu_llvm.sh /work/ -RUN /work/ubuntu_llvm.sh - -COPY install/ubuntu_caffe.sh /work/ -RUN /work/ubuntu_caffe.sh - -COPY install/ubuntu_onnx.sh /work/ -RUN /work/ubuntu_onnx.sh - -COPY install/ubuntu_docs.sh /work/ -COPY install/requirements /work/ -RUN /work/ubuntu_docs.sh - -COPY install/ubuntu_tutorials.sh /work/ -RUN /work/ubuntu_tutorials.sh - # Always last ARG USER_ID=0 ARG GROUP_ID=0 From 5345be064756f4f45c8f7b5c1272c2a5b15cd901 Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Wed, 16 Sep 2020 00:37:28 +0000 Subject: [PATCH 11/12] Add sm_80 and compute_80 to compiled cuda architectures. --- ci/docker/runtime_functions.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index e6558aac7aa8..d04f98722ff9 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -809,6 +809,9 @@ build_ubuntu_gpu_cuda101_cudnn7() { build_ubuntu_gpu_cuda110_cudnn8() { set -ex build_ccache_wrappers + local CUDA_ARCH="-gencode=arch=compute_52,code=sm_52 \ + -gencode=arch=compute_70,code=sm_70 \ + -gencode=arch=compute_80,code=sm_80" make \ USE_BLAS=openblas \ USE_MKLDNN=0 \ @@ -818,7 +821,7 @@ build_ubuntu_gpu_cuda110_cudnn8() { USE_TVM_OP=0 \ USE_CPP_PACKAGE=1 \ USE_DIST_KVSTORE=1 \ - CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ + CUDA_ARCH="$CUDA_ARCH" \ USE_SIGNAL_HANDLER=1 \ -j$(nproc) make cython PYTHON=python3 From 5b52f84e549ad7e109506c1a80a5a690fafc6024 Mon Sep 17 00:00:00 2001 From: Joe Evans Date: Wed, 16 Sep 2020 00:43:04 +0000 Subject: [PATCH 12/12] Add back binutils install since we are building for more cuda architectures and will hit the ar limit. --- ci/docker/Dockerfile.build.ubuntu_build_cuda110 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda110 b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 index d4ca059dbf06..3dcb3d6595be 100644 --- a/ci/docker/Dockerfile.build.ubuntu_build_cuda110 +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda110 @@ -32,6 +32,8 @@ RUN /work/deb_ubuntu_ccache.sh COPY install/ubuntu_python.sh /work/ COPY install/requirements /work/ RUN /work/ubuntu_python.sh +COPY install/ubuntu_binutils.sh /work/ +RUN /work/ubuntu_binutils.sh # Keep this at the end since this command is not cachable ARG USER_ID=0