From 683f3d4ab9b26c1f35242165f14321bb7b22fe49 Mon Sep 17 00:00:00 2001 From: Sheng Zha Date: Sun, 17 Jan 2021 20:14:07 -0500 Subject: [PATCH 1/2] add build variant for cuda 11.2 Signed-off-by: Sheng Zha --- cd/Jenkinsfile_cd_pipeline | 2 +- cd/Jenkinsfile_release_job | 2 +- cd/README.md | 5 ++-- cd/utils/artifact_repository.md | 2 +- cd/utils/mxnet_base_image.sh | 3 ++ ci/docker/docker-compose.yml | 10 +++++++ tools/pip/doc/CPU_ADDITIONAL.md | 1 + tools/pip/doc/CU101_ADDITIONAL.md | 1 + tools/pip/doc/CU102_ADDITIONAL.md | 1 + tools/pip/doc/CU110_ADDITIONAL.md | 1 + tools/pip/doc/CU112_ADDITIONAL.md | 44 ++++++++++++++++++++++++++++++ tools/pip/doc/NATIVE_ADDITIONAL.md | 1 + tools/pip/setup.py | 4 ++- tools/staticbuild/README.md | 4 +-- 14 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 tools/pip/doc/CU112_ADDITIONAL.md diff --git a/cd/Jenkinsfile_cd_pipeline b/cd/Jenkinsfile_cd_pipeline index 3a16569144ba..6a64c243b06f 100644 --- a/cd/Jenkinsfile_cd_pipeline +++ b/cd/Jenkinsfile_cd_pipeline @@ -36,7 +36,7 @@ pipeline { parameters { // Release parameters - string(defaultValue: "cpu,native,cu101,cu102,cu110", description: "Comma separated list of variants", name: "MXNET_VARIANTS") + string(defaultValue: "cpu,native,cu101,cu102,cu110,cu112", description: "Comma separated list of variants", name: "MXNET_VARIANTS") booleanParam(defaultValue: false, description: 'Whether this is a release build or not', name: "RELEASE_BUILD") } diff --git a/cd/Jenkinsfile_release_job b/cd/Jenkinsfile_release_job index 1520880881e7..6b7784be7ade 100644 --- a/cd/Jenkinsfile_release_job +++ b/cd/Jenkinsfile_release_job @@ -43,7 +43,7 @@ pipeline { // any disruption caused by different COMMIT_ID values chaning the job parameter configuration on // Jenkins. string(defaultValue: "mxnet_lib/static", description: "Pipeline to build", name: "RELEASE_JOB_TYPE") - string(defaultValue: "cpu,native,cu101,cu102,cu110", description: "Comma separated list of variants", name: "MXNET_VARIANTS") + string(defaultValue: "cpu,native,cu101,cu102,cu110,cu112", description: "Comma separated list of variants", name: "MXNET_VARIANTS") booleanParam(defaultValue: false, description: 'Whether this is a release build or not', name: "RELEASE_BUILD") } diff --git a/cd/README.md b/cd/README.md index b04e61d928fe..4967f1d0f17b 100644 --- a/cd/README.md +++ b/cd/README.md @@ -25,7 +25,7 @@ MXNet aims to support a variety of frontends, e.g. Python, Java, Perl, R, etc. a The CD process is driven by the [CD pipeline job](Jenkinsfile_cd_pipeline), which orchestrates the order in which the artifacts are delivered. For instance, first publish the libmxnet library before publishing the pip package. It does this by triggering the [release job](Jenkinsfile_release_job) with a specific set of parameters for each delivery channel. The release job executes the specific release pipeline for a delivery channel across all MXNet *variants*. -A variant is a specific environment or features for which MXNet is compiled. For instance CPU, GPU with CUDA v10.1, CUDA v10.2 with MKL-DNN support, etc. +A variant is a specific environment or features for which MXNet is compiled. For instance CPU, GPU with CUDA v10.1, CUDA v10.2 with MKL-DNN support, etc. Currently, below variants are supported. All of these variants except native have MKL-DNN backend enabled. @@ -34,6 +34,7 @@ Currently, below variants are supported. All of these variants except native hav * *cu101*: CUDA 10.1 * *cu102*: CUDA 10.2 * *cu110*: CUDA 11.0 +* *cu112*: CUDA 11.2 *For more on variants, see [here](https://github.com/apache/incubator-mxnet/issues/8671)* @@ -119,7 +120,7 @@ The "first mile" of the CD process is posting the mxnet binaries to the [artifac ##### Timeout -We shouldn't set global timeouts for the pipelines. Rather, the `step` being executed should be rapped with a `timeout` function (as in the pipeline example above). The `max_time` is a global variable set at the [release job](Jenkinsfile_release_job) level. +We shouldn't set global timeouts for the pipelines. Rather, the `step` being executed should be rapped with a `timeout` function (as in the pipeline example above). The `max_time` is a global variable set at the [release job](Jenkinsfile_release_job) level. ##### Node of execution diff --git a/cd/utils/artifact_repository.md b/cd/utils/artifact_repository.md index a7d8ab3c690d..c37646b51a66 100644 --- a/cd/utils/artifact_repository.md +++ b/cd/utils/artifact_repository.md @@ -53,7 +53,7 @@ If not set, derived through the value of sys.platform (https://docs.python.org/3 **Variant** -Manually configured through the --variant argument. The current variants are: cpu, native, cu101, cu102, cu110. +Manually configured through the --variant argument. The current variants are: cpu, native, cu101, cu102, cu110, cu112. As long as the tool is being run from the MXNet code base, the runtime feature detection tool (https://github.com/larroy/mxnet/blob/dd432b7f241c9da2c96bcb877c2dc84e6a1f74d4/docs/api/python/libinfo/libinfo.md) can be used to detect whether the library has been compiled with MKL (library has MKL-DNN feature enabled) and/or CUDA support (compiled with CUDA feature enabled). diff --git a/cd/utils/mxnet_base_image.sh b/cd/utils/mxnet_base_image.sh index fd96f37f7afe..a30edfdfc528 100755 --- a/cd/utils/mxnet_base_image.sh +++ b/cd/utils/mxnet_base_image.sh @@ -30,6 +30,9 @@ case ${mxnet_variant} in cu110*) echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu16.04" ;; + cu112*) + echo "nvidia/cuda:11.2-cudnn8-runtime-ubuntu16.04" + ;; cpu) echo "ubuntu:16.04" ;; diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml index 7c0ef4382087..5b197ab4e402 100644 --- a/ci/docker/docker-compose.yml +++ b/ci/docker/docker-compose.yml @@ -71,6 +71,16 @@ services: BASE_IMAGE: nvidia/cuda:11.0-cudnn8-devel-centos7 cache_from: - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu110:latest + centos7_gpu_cu112: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu112:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: base + args: + BASE_IMAGE: nvidia/cuda:11.2-cudnn8-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu112:latest ################################################################################################### # Dockerfile.build.ubuntu based images. On Ubuntu we test more recent # toolchain and dependency versions compared to CentOS7. We attempt to update diff --git a/tools/pip/doc/CPU_ADDITIONAL.md b/tools/pip/doc/CPU_ADDITIONAL.md index 02edca22fa32..d5b2f101c662 100644 --- a/tools/pip/doc/CPU_ADDITIONAL.md +++ b/tools/pip/doc/CPU_ADDITIONAL.md @@ -18,6 +18,7 @@ Prerequisites ------------- This package supports Linux, Mac OSX, and Windows platforms. You may also want to check: +- [mxnet-cu112](https://pypi.python.org/pypi/mxnet-cu112/) with CUDA-11.2 support. - [mxnet-cu110](https://pypi.python.org/pypi/mxnet-cu110/) with CUDA-11.0 support. - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support. - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support. diff --git a/tools/pip/doc/CU101_ADDITIONAL.md b/tools/pip/doc/CU101_ADDITIONAL.md index 761c950dc0c1..3d2c53573c68 100644 --- a/tools/pip/doc/CU101_ADDITIONAL.md +++ b/tools/pip/doc/CU101_ADDITIONAL.md @@ -18,6 +18,7 @@ Prerequisites ------------- This package supports Linux and Windows platforms. You may also want to check: +- [mxnet-cu112](https://pypi.python.org/pypi/mxnet-cu112/) with CUDA-11.2 support. - [mxnet-cu110](https://pypi.python.org/pypi/mxnet-cu110/) with CUDA-11.0 support. - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support. - [mxnet](https://pypi.python.org/pypi/mxnet/). diff --git a/tools/pip/doc/CU102_ADDITIONAL.md b/tools/pip/doc/CU102_ADDITIONAL.md index 412359b8f279..6f62d56ed5cd 100644 --- a/tools/pip/doc/CU102_ADDITIONAL.md +++ b/tools/pip/doc/CU102_ADDITIONAL.md @@ -18,6 +18,7 @@ Prerequisites ------------- This package supports Linux and Windows platforms. You may also want to check: +- [mxnet-cu112](https://pypi.python.org/pypi/mxnet-cu112/) with CUDA-11.2 support. - [mxnet-cu110](https://pypi.python.org/pypi/mxnet-cu110/) with CUDA-11.0 support. - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support. - [mxnet](https://pypi.python.org/pypi/mxnet/). diff --git a/tools/pip/doc/CU110_ADDITIONAL.md b/tools/pip/doc/CU110_ADDITIONAL.md index 03295e79181d..d442a8ea2bcf 100644 --- a/tools/pip/doc/CU110_ADDITIONAL.md +++ b/tools/pip/doc/CU110_ADDITIONAL.md @@ -18,6 +18,7 @@ Prerequisites ------------- This package supports Linux and Windows platforms. You may also want to check: +- [mxnet-cu112](https://pypi.python.org/pypi/mxnet-cu112/) with CUDA-11.2 support. - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support. - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support. - [mxnet](https://pypi.python.org/pypi/mxnet/). diff --git a/tools/pip/doc/CU112_ADDITIONAL.md b/tools/pip/doc/CU112_ADDITIONAL.md new file mode 100644 index 000000000000..206c96a2e871 --- /dev/null +++ b/tools/pip/doc/CU112_ADDITIONAL.md @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + +Prerequisites +------------- +This package supports Linux and Windows platforms. You may also want to check: +- [mxnet-cu110](https://pypi.python.org/pypi/mxnet-cu110/) with CUDA-11.0 support. +- [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support. +- [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support. +- [mxnet](https://pypi.python.org/pypi/mxnet/). +- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without MKLDNN. + +To download CUDA, check [CUDA download](https://developer.nvidia.com/cuda-downloads). For more instructions, check [CUDA Toolkit online documentation](http://docs.nvidia.com/cuda/index.html). + +To use this package on Linux you need the `libquadmath.so.0` shared library. On +Debian based systems, including Ubuntu, run `sudo apt install libquadmath0` to +install the shared library. On RHEL based systems, including CentOS, run `sudo +yum install libquadmath` to install the shared library. As `libquadmath.so.0` is +a GPL library and MXNet part of the Apache Software Foundation, MXNet must not +redistribute `libquadmath.so.0` as part of the Pypi package and users must +manually install it. + +To install for other platforms (e.g. Windows, Raspberry Pi/ARM) or other versions, check [Installing MXNet](https://mxnet.apache.org/versions/master) for instructions on building from source. + +Installation +------------ +To install: +```bash +pip install mxnet-cu112 +``` diff --git a/tools/pip/doc/NATIVE_ADDITIONAL.md b/tools/pip/doc/NATIVE_ADDITIONAL.md index f3f44ec5f202..ef26962fd4ef 100644 --- a/tools/pip/doc/NATIVE_ADDITIONAL.md +++ b/tools/pip/doc/NATIVE_ADDITIONAL.md @@ -18,6 +18,7 @@ Prerequisites ------------- This package supports Linux and Windows platforms. You may also want to check: +- [mxnet-cu112](https://pypi.python.org/pypi/mxnet-cu112/) with CUDA-11.2 support. - [mxnet-cu110](https://pypi.python.org/pypi/mxnet-cu110/) with CUDA-11.0 support. - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support. - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support. diff --git a/tools/pip/setup.py b/tools/pip/setup.py index 97602e5670cf..e96ff9fe4428 100644 --- a/tools/pip/setup.py +++ b/tools/pip/setup.py @@ -130,7 +130,9 @@ def skip_markdown_comments(md): if variant == 'CPU': libraries.append('openblas') else: - if variant.startswith('CU110'): + if variant.startswith('CU112'): + libraries.append('CUDA-11.2') + elif variant.startswith('CU110'): libraries.append('CUDA-11.0') elif variant.startswith('CU102'): libraries.append('CUDA-10.2') diff --git a/tools/staticbuild/README.md b/tools/staticbuild/README.md index 57a7acfb14e8..684ca9826080 100644 --- a/tools/staticbuild/README.md +++ b/tools/staticbuild/README.md @@ -28,9 +28,9 @@ You need to install `patchelf` first, for example via `apt install patchelf` on Ubuntu systems. ``` -tools/staticbuild/build.sh cu102 +tools/staticbuild/build.sh cu112 ``` -This would build the mxnet package based on CUDA 10.2. Currently, we support variants cpu, native, cu101, cu102 and cu110. All of these variants expect native have MKL-DNN backend enabled. +This would build the mxnet package based on CUDA 11.2. Currently, we support variants cpu, native, cu101, cu102, cu110, and cu112. All of these variants expect native have MKL-DNN backend enabled. ``` tools/staticbuild/build.sh cpu From 32434e800c0f8c7306ff3c5dda94ccafff559425 Mon Sep 17 00:00:00 2001 From: Sheng Zha Date: Fri, 5 Feb 2021 10:30:31 -0500 Subject: [PATCH 2/2] update tag name Signed-off-by: Sheng Zha --- ci/docker/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml index 5b197ab4e402..721b5664643b 100644 --- a/ci/docker/docker-compose.yml +++ b/ci/docker/docker-compose.yml @@ -78,7 +78,7 @@ services: dockerfile: Dockerfile.build.centos7 target: base args: - BASE_IMAGE: nvidia/cuda:11.2-cudnn8-devel-centos7 + BASE_IMAGE: nvidia/cuda:11.2.0-cudnn8-devel-centos7 cache_from: - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu112:latest ###################################################################################################