diff --git a/.gitignore b/.gitignore
index c7530ab69c6a..7eb8e7d6e777 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,10 +167,6 @@ python/.eggs
tests/Makefile
tests/mxnet_unit_tests
-# generated wrappers for ccache
-cc
-cxx
-
# Code coverage related
.coverage
*.gcov
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3b8bbd2e0272..f09a9c29e9d8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@ mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON)
mxnet_option(USE_LAPACK "Build with lapack support" ON)
mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
mxnet_option(USE_MKLML_MKL "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE))
-mxnet_option(USE_MKLDNN "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE))
+mxnet_option(USE_MKLDNN "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE)
mxnet_option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON IF NOT MSVC)
mxnet_option(USE_GPERFTOOLS "Build with GPerfTools support (if found)" ON)
mxnet_option(USE_JEMALLOC "Build with Jemalloc support" ON)
diff --git a/CODEOWNERS b/CODEOWNERS
index 5a88e89dfb02..ce648ef2e087 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -13,14 +13,14 @@
# Language bindings
/R-package/ @thirdwing
-/scala-package/ @yzhliu @nswamy
+/scala-package/ @yzhliu @nswamy @pllarroy
/perl-package/ @sergeykolychev
-/python/ @szha
+/python/ @szha @pllarroy
/contrib/clojure-package/ @gigasquid
# C++ base
/src/kvstore/ @rahul003 @anirudh2290
-/include/ @anirudh2290
+/include/ @anirudh2290 @pllarroy
/src/c_api/ @anirudh2290
/src/common/ @anirudh2290
/src/engine/ @anirudh2290
@@ -33,13 +33,17 @@
/src/profiler/ @anirudh2290
/src/storage/ @anirudh2290
/tests/cpp/ @anirudh2290
-/cpp-package/ @nswamy
+/cpp-package/ @nswamy @pllarroy
+/src/ @pllarroy
+/plugin/ @pllarroy
# CMake
-CMakeLists.txt @szha @rahul003
-/cmake/ @szha @rahul003
+CMakeLists.txt @szha @rahul003 @pllarroy
+/cmake/ @szha @rahul003 @pllarroy
# MXNet CI
+dev_menu.py @pllarroy
+/ci/ @pllarroy
/tests/ci_build/ @marcoabreu
Jenkinsfile @marcoabreu
.travis.yml @marcoabreu
@@ -50,16 +54,16 @@ Makefile @szha
prepare_mkl.sh @szha
# Docs
-/docs/ @szha
+/docs/ @szha @pllarroy
# Submodules
.gitmodules @szha
# Examples
-/example/ @szha
+/example/ @szha @pllarroy
# Tools
-/tools/ @szha
+/tools/ @szha @pllarroy
# Github templates
/.github/ @szha
diff --git a/MKLDNN_README.md b/MKLDNN_README.md
index 2618d23388e7..6b25fee85195 100644
--- a/MKLDNN_README.md
+++ b/MKLDNN_README.md
@@ -1,9 +1,9 @@
# Build/Install MXNet with MKL-DNN
-A better training and inference perforamce are expected to achieved on Intel-Architecture CPUs with MXNET built with [Intel MKL-DNN](https://github.com/intel/mkl-dnn) on multiple operating system, including Linux, Windows and MacOS.
-In the following sections, you will find building instructions for MXNET with Intel MKL-DNN on Linux, MacOS and Windows.
+A better training and inference performance is expected to be achieved on Intel-Architecture CPUs with MXNet built with [Intel MKL-DNN](https://github.com/intel/mkl-dnn) on multiple operating system, including Linux, Windows and MacOS.
+In the following sections, you will find build instructions for MXNet with Intel MKL-DNN on Linux, MacOS and Windows.
-The detailed performance data collected on Intel Xeon CPU with MXNET built with Intel MKL-DNN can be found at [here](https://mxnet.incubator.apache.org/faq/perf.html#intel-cpu).
+The detailed performance data collected on Intel Xeon CPU with MXNet built with Intel MKL-DNN can be found [here](https://mxnet.incubator.apache.org/faq/perf.html#intel-cpu).
Contents
@@ -83,7 +83,7 @@ LIBRARY_PATH=$(brew --prefix llvm)/lib/ make -j $(sysctl -n hw.ncpu) CC=$(brew -
Windows
-On Windows, you can use [Micrsoft Visual Studio 2015](https://www.visualstudio.com/vs/older-downloads/) and [Microsoft Visual Studio 2017](https://www.visualstudio.com/downloads/) to compile MXNET with Intel MKL-DNN.
+On Windows, you can use [Micrsoft Visual Studio 2015](https://www.visualstudio.com/vs/older-downloads/) and [Microsoft Visual Studio 2017](https://www.visualstudio.com/downloads/) to compile MXNet with Intel MKL-DNN.
[Micrsoft Visual Studio 2015](https://www.visualstudio.com/vs/older-downloads/) is recommended.
**Visual Studio 2015**
@@ -123,7 +123,7 @@ cmake -G "Visual Studio 14 Win64" .. -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -D
These commands produce a library called ```libmxnet.dll``` in the ```./build/Release/``` or ```./build/Debug``` folder.
Also ```libmkldnn.dll``` with be in the ```./build/3rdparty/mkldnn/src/Release/```
-6. Make sure that all the dll files used above(such as `libmkldnn.dll`, `libmklml.dll`, `libiomp5.dll`, `libopenblas.dll`, etc) are added to the system PATH. For convinence, you can put all of them to ```\windows\system32```. Or you will come across `Not Found Dependencies` when loading mxnet.
+6. Make sure that all the dll files used above(such as `libmkldnn.dll`, `libmklml.dll`, `libiomp5.dll`, `libopenblas.dll`, etc) are added to the system PATH. For convinence, you can put all of them to ```\windows\system32```. Or you will come across `Not Found Dependencies` when loading MXNet.
**Visual Studio 2017**
@@ -177,7 +177,7 @@ cmake -G "Visual Studio 15 2017 Win64" .. -T host=x64 -DUSE_CUDA=0 -DUSE_CUDNN=0
msbuild mxnet.sln /p:Configuration=Release;Platform=x64 /maxcpucount
```
-9. Make sure that all the dll files used above(such as `libmkldnn.dll`, `libmklml.dll`, `libiomp5.dll`, `libopenblas.dll`, etc) are added to the system PATH. For convinence, you can put all of them to ```\windows\system32```. Or you will come across `Not Found Dependencies` when loading mxnet.
+9. Make sure that all the dll files used above(such as `libmkldnn.dll`, `libmklml.dll`, `libiomp5.dll`, `libopenblas.dll`, etc) are added to the system PATH. For convinence, you can put all of them to ```\windows\system32```. Or you will come across `Not Found Dependencies` when loading MXNet.
Verify MXNet with python
diff --git a/Makefile b/Makefile
index f15968bfe526..08539df7ea12 100644
--- a/Makefile
+++ b/Makefile
@@ -60,6 +60,10 @@ endif
# use customized config file
include $(config)
+ifndef $(USE_MKLDNN)
+ USE_MKLDNN = 1
+endif
+
ifeq ($(USE_MKL2017), 1)
$(warning "USE_MKL2017 is deprecated. We will switch to USE_MKLDNN.")
USE_MKLDNN=1
@@ -463,7 +467,7 @@ build/src/%.o: src/%.cc | mkldnn
build/src/%_gpu.o: src/%.cu | mkldnn
@mkdir -p $(@D)
- $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" -M -MT build/src/$*_gpu.o $< >build/src/$*_gpu.d
+ $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" --generate-dependencies -MT build/src/$*_gpu.o $< >build/src/$*_gpu.d
$(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" $<
# A nvcc bug cause it to generate "generic/xxx.h" dependencies from torch headers.
@@ -479,7 +483,7 @@ build/plugin/%.o: plugin/%.cc
%_gpu.o: %.cu
@mkdir -p $(@D)
- $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS) -Isrc/operator" -M -MT $*_gpu.o $< >$*_gpu.d
+ $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS) -Isrc/operator" --generate-dependencies -MT $*_gpu.o $< >$*_gpu.d
$(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS) -Isrc/operator" $<
%.o: %.cc $(CORE_INC)
@@ -686,7 +690,7 @@ rclean:
ifneq ($(EXTRA_OPERATORS),)
clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN)
- $(RM) -r build lib bin *~ */*~ */*/*~ */*/*/*~
+ $(RM) -r build lib bin deps *~ */*~ */*/*~ */*/*/*~
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
diff --git a/ci/docker/Dockerfile.build.android_armv7 b/ci/docker/Dockerfile.build.android_armv7
index c601fc5e5ff7..a2e98cd2efe1 100644
--- a/ci/docker/Dockerfile.build.android_armv7
+++ b/ci/docker/Dockerfile.build.android_armv7
@@ -75,6 +75,11 @@ ENV OpenBLAS_DIR=${CROSS_ROOT}
WORKDIR /work
+ARG USER_ID=0
+ARG GROUP_ID=0
+COPY install/ubuntu_adduser.sh /work/
+RUN /work/ubuntu_adduser.sh
+
COPY runtime_functions.sh /work/
WORKDIR /work/mxnet
diff --git a/ci/docker/Dockerfile.build.android_armv8 b/ci/docker/Dockerfile.build.android_armv8
index 60376b8efda2..f7de86763457 100644
--- a/ci/docker/Dockerfile.build.android_armv8
+++ b/ci/docker/Dockerfile.build.android_armv8
@@ -74,6 +74,12 @@ ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++
COPY install/android_arm64_openblas.sh /work/
RUN /work/android_arm64_openblas.sh
ENV CPLUS_INCLUDE_PATH /work/deps/OpenBLAS
-WORKDIR /work/build
+
+ARG USER_ID=0
+ARG GROUP_ID=0
+COPY install/ubuntu_adduser.sh /work/
+RUN /work/ubuntu_adduser.sh
COPY runtime_functions.sh /work/
+
+WORKDIR /work/build
\ No newline at end of file
diff --git a/ci/docker/Dockerfile.build.armv6 b/ci/docker/Dockerfile.build.armv6
index 6f16d8c77a0a..60e223b7a60f 100644
--- a/ci/docker/Dockerfile.build.armv6
+++ b/ci/docker/Dockerfile.build.armv6
@@ -38,5 +38,10 @@ ENV OpenBLAS_DIR=${CROSS_ROOT}
COPY install/deb_ubuntu_ccache.sh /work/
RUN /work/deb_ubuntu_ccache.sh
+ARG USER_ID=0
+ARG GROUP_ID=0
+COPY install/ubuntu_adduser.sh /work/
+RUN /work/ubuntu_adduser.sh
+
COPY runtime_functions.sh /work/
WORKDIR /work/mxnet
diff --git a/ci/docker/Dockerfile.build.armv7 b/ci/docker/Dockerfile.build.armv7
index 5f0223448f12..0b557d5839e9 100644
--- a/ci/docker/Dockerfile.build.armv7
+++ b/ci/docker/Dockerfile.build.armv7
@@ -38,5 +38,10 @@ ENV OpenBLAS_DIR=${CROSS_ROOT}
COPY install/deb_ubuntu_ccache.sh /work/
RUN /work/deb_ubuntu_ccache.sh
+ARG USER_ID=0
+ARG GROUP_ID=0
+COPY install/ubuntu_adduser.sh /work/
+RUN /work/ubuntu_adduser.sh
+
COPY runtime_functions.sh /work/
WORKDIR /work/mxnet
diff --git a/ci/docker/Dockerfile.build.armv8 b/ci/docker/Dockerfile.build.armv8
index 27bd425ae9b7..ef9c95865590 100644
--- a/ci/docker/Dockerfile.build.armv8
+++ b/ci/docker/Dockerfile.build.armv8
@@ -42,5 +42,10 @@ ENV OpenBLAS_DIR=${CROSS_ROOT}
COPY install/deb_ubuntu_ccache.sh /work/
RUN /work/deb_ubuntu_ccache.sh
+ARG USER_ID=0
+ARG GROUP_ID=0
+COPY install/ubuntu_adduser.sh /work/
+RUN /work/ubuntu_adduser.sh
+
COPY runtime_functions.sh /work/
WORKDIR /work/build
diff --git a/ci/docker/Dockerfile.build.jetson b/ci/docker/Dockerfile.build.jetson
index d128ebc7e2a7..07097887f87d 100644
--- a/ci/docker/Dockerfile.build.jetson
+++ b/ci/docker/Dockerfile.build.jetson
@@ -77,10 +77,16 @@ RUN JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcen
dpkg -i --force-architecture $ARM_NVINFER_INSTALLER_PACKAGE && \
dpkg -i --force-architecture $ARM_NVINFER_DEV_INSTALLER_PACKAGE && \
apt update -y || true && apt install -y cuda-libraries-dev-9-0 libcudnn7-dev libnvinfer-dev
+RUN ln -s /usr/include/aarch64-linux-gnu/cudnn_v7.h /usr/include/aarch64-linux-gnu/cudnn.h
ENV PATH $PATH:/usr/local/cuda/bin
ENV NVCCFLAGS "-m64"
ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62"
ENV NVCC /usr/local/cuda/bin/nvcc
+ARG USER_ID=0
+ARG GROUP_ID=0
+COPY install/ubuntu_adduser.sh /work/
+RUN /work/ubuntu_adduser.sh
+
COPY runtime_functions.sh /work/
WORKDIR /work/mxnet
diff --git a/ci/docker/install/centos7_adduser.sh b/ci/docker/install/centos7_adduser.sh
index ba72c9b92281..f9d2402c9554 100755
--- a/ci/docker/install/centos7_adduser.sh
+++ b/ci/docker/install/centos7_adduser.sh
@@ -34,4 +34,9 @@ then
mkdir /work/mxnet
mkdir /work/build
chown -R jenkins_slave /work/
+
+ # Later on, we have to override the links because underlying build systems ignore our compiler settings. Thus,
+ # we have to give the process the proper permission to these files. This is hacky, but unfortunately
+ # there's no better way to do this without patching all our submodules.
+ chown -R jenkins_slave /usr/local/bin
fi
diff --git a/ci/docker/install/ubuntu_adduser.sh b/ci/docker/install/ubuntu_adduser.sh
index 515a80f63b07..a7668bac2ab6 100755
--- a/ci/docker/install/ubuntu_adduser.sh
+++ b/ci/docker/install/ubuntu_adduser.sh
@@ -40,4 +40,9 @@ then
mkdir /work/mxnet
mkdir /work/build
chown -R jenkins_slave /work/
+
+ # Later on, we have to override the links because underlying build systems ignore our compiler settings. Thus,
+ # we have to give the process the proper permission to these files. This is hacky, but unfortunately
+ # there's no better way to do this without patching all our submodules.
+ chown -R jenkins_slave /usr/local/bin
fi
diff --git a/ci/docker/install/ubuntu_publish.sh b/ci/docker/install/ubuntu_publish.sh
new file mode 100644
index 000000000000..bc3513dd13e5
--- /dev/null
+++ b/ci/docker/install/ubuntu_publish.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Build on Ubuntu 14.04 LTS for LINUX CPU/GPU
+apt-get update
+apt-get install -y software-properties-common
+add-apt-repository ppa:ubuntu-toolchain-r/test -y
+add-apt-repository ppa:openjdk-r/ppa -y # Java lib
+apt-get update
+apt-get install -y git \
+ cmake3 \
+ libcurl4-openssl-dev \
+ unzip \
+ gcc-4.8 \
+ g++-4.8 \
+ gfortran \
+ gfortran-4.8 \
+ binutils \
+ nasm \
+ libtool \
+ curl \
+ pandoc \
+ python3-pip \
+ automake \
+ pkg-config \
+ openjdk-8-jdk
+curl -o apache-maven-3.3.9-bin.tar.gz http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
+tar xzf apache-maven-3.3.9-bin.tar.gz
+mkdir /usr/local/maven
+mv apache-maven-3.3.9/ /usr/local/maven/
+update-alternatives --install /usr/bin/mvn mvn /usr/local/maven/apache-maven-3.3.9/bin/mvn 1
+update-ca-certificates -f
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 6dd5bb6f239d..53cd9ba2d4c6 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -39,32 +39,59 @@ clean_repo() {
build_ccache_wrappers() {
set -ex
- rm -f cc
- rm -f cxx
-
- touch cc
- touch cxx
-
if [ -z ${CC+x} ]; then
echo "No \$CC set, defaulting to gcc";
export CC=gcc
fi
-
- if [ -z ${CXX+x} ]; then
+ if [ -z ${CXX+x} ]; then
echo "No \$CXX set, defaulting to g++";
export CXX=g++
fi
- # this function is nessesary for cuda enabled make based builds, since nvcc needs just an executable for -ccbin
-
- echo -e "#!/bin/sh\n/usr/local/bin/ccache ${CC} \"\$@\"\n" >> cc
- echo -e "#!/bin/sh\n/usr/local/bin/ccache ${CXX} \"\$@\"\n" >> cxx
-
- chmod +x cc
- chmod +x cxx
-
- export CC=`pwd`/cc
- export CXX=`pwd`/cxx
+ # Recommended by CCache: https://ccache.samba.org/manual.html#_run_modes
+ # Add to the beginning of path to ensure this redirection is picked up instead
+ # of the original ones. Especially CUDA/NVCC appends itself to the beginning of the
+ # path and thus this redirect is ignored. This change fixes this problem
+ # This hacky approach with symbolic links is required because underlying build
+ # systems of our submodules ignore our CMake settings. If they use Makefile,
+ # we can't influence them at all in general and NVCC also prefers to hardcode their
+ # compiler instead of respecting the settings. Thus, we take this brutal approach
+ # and just redirect everything of this installer has been called.
+ # In future, we could do these links during image build time of the container.
+ # But in the beginning, we'll make this opt-in. In future, loads of processes like
+ # the scala make step or numpy compilation and other pip package generations
+ # could be heavily sped up by using ccache as well.
+ mkdir /tmp/ccache-redirects
+ export PATH=/tmp/ccache-redirects:$PATH
+ ln -s ccache /tmp/ccache-redirects/gcc
+ ln -s ccache /tmp/ccache-redirects/gcc-8
+ ln -s ccache /tmp/ccache-redirects/g++
+ ln -s ccache /tmp/ccache-redirects/g++-8
+ ln -s ccache /tmp/ccache-redirects/nvcc
+ ln -s ccache /tmp/ccache-redirects/clang++-3.9
+ ln -s ccache /tmp/ccache-redirects/clang-3.9
+ ln -s ccache /tmp/ccache-redirects/clang++-5.0
+ ln -s ccache /tmp/ccache-redirects/clang-5.0
+ ln -s ccache /tmp/ccache-redirects/clang++-6.0
+ ln -s ccache /tmp/ccache-redirects/clang-6.0
+ ln -s ccache /usr/local/bin/gcc
+ ln -s ccache /usr/local/bin/gcc-8
+ ln -s ccache /usr/local/bin/g++
+ ln -s ccache /usr/local/bin/g++-8
+ ln -s ccache /usr/local/bin/nvcc
+ ln -s ccache /usr/local/bin/clang++-3.9
+ ln -s ccache /usr/local/bin/clang-3.9
+ ln -s ccache /usr/local/bin/clang++-5.0
+ ln -s ccache /usr/local/bin/clang-5.0
+ ln -s ccache /usr/local/bin/clang++-6.0
+ ln -s ccache /usr/local/bin/clang-6.0
+
+ export NVCC=ccache
+
+ # Uncomment if you would like to debug CCache hit rates.
+ # You can monitor using tail -f ccache-log
+ # export CCACHE_LOGFILE=/work/mxnet/ccache-log
+ # export CCACHE_DEBUG=1
}
build_wheel() {
@@ -106,6 +133,8 @@ build_jetson() {
set -ex
pushd .
+ #build_ccache_wrappers
+
cp make/crosscompile.jetson.mk ./config.mk
make -j$(nproc)
@@ -129,6 +158,7 @@ build_armv6() {
# We do not need OpenMP, since most armv6 systems have only 1 core
+ build_ccache_wrappers
cmake \
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
@@ -159,6 +189,7 @@ build_armv7() {
# file tries to add -llapack. Lapack functionality though, requires -lgfortran
# to be linked additionally.
+ build_ccache_wrappers
cmake \
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \
-DCMAKE_CROSSCOMPILING=ON \
@@ -181,6 +212,7 @@ build_armv7() {
}
build_armv8() {
+ build_ccache_wrappers
cmake \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
@@ -205,6 +237,7 @@ build_armv8() {
build_android_armv7() {
set -ex
cd /work/build
+ build_ccache_wrappers
cmake \
-DANDROID=ON\
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
@@ -225,6 +258,7 @@ build_android_armv7() {
build_android_armv8() {
set -ex
cd /work/build
+ build_ccache_wrappers
cmake\
-DANDROID=ON \
-DUSE_CUDA=OFF\
@@ -244,7 +278,7 @@ build_centos7_cpu() {
cd /work/mxnet
export CC="ccache gcc"
export CXX="ccache g++"
-
+ build_ccache_wrappers
make \
DEV=1 \
USE_LAPACK=1 \
@@ -253,10 +287,17 @@ build_centos7_cpu() {
USE_BLAS=openblas \
USE_DIST_KVSTORE=1 \
-j$(nproc)
+ cp lib/libmkldnn.so.0 lib/libmkldnn.so.0.tmp
+ mv lib/libmkldnn.so.0.tmp lib/libmkldnn.so.0
+ cp lib/libmklml_intel.so lib/libmklml_intel.so.tmp
+ mv lib/libmklml_intel.so.tmp lib/libmklml_intel.so
+ cp lib/libiomp5.so lib/libiomp5.so.tmp
+ mv lib/libiomp5.so.tmp lib/libiomp5.so
}
build_amzn_linux_cpu() {
cd /work/build
+ build_ccache_wrappers
cmake \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
@@ -279,13 +320,12 @@ build_centos7_mkldnn() {
cd /work/mxnet
export CC="ccache gcc"
export CXX="ccache g++"
-
+ build_ccache_wrappers
make \
DEV=1 \
ENABLE_TESTCOVERAGE=1 \
USE_LAPACK=1 \
USE_LAPACK_PATH=/usr/lib64/liblapack.so \
- USE_MKLDNN=1 \
USE_BLAS=openblas \
-j$(nproc)
}
@@ -294,7 +334,7 @@ build_centos7_gpu() {
set -ex
cd /work/mxnet
# unfortunately this build has problems in 3rdparty dependencies with ccache and make
- # build_ccache_wrappers
+ build_ccache_wrappers
make \
DEV=1 \
ENABLE_TESTCOVERAGE=1 \
@@ -307,6 +347,12 @@ build_centos7_gpu() {
USE_DIST_KVSTORE=1 \
CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
-j$(nproc)
+ cp lib/libmkldnn.so.0 lib/libmkldnn.so.0.tmp
+ mv lib/libmkldnn.so.0.tmp lib/libmkldnn.so.0
+ cp lib/libmklml_intel.so lib/libmklml_intel.so.tmp
+ mv lib/libmklml_intel.so.tmp lib/libmklml_intel.so
+ cp lib/libiomp5.so lib/libiomp5.so.tmp
+ mv lib/libiomp5.so.tmp lib/libiomp5.so
}
build_ubuntu_cpu() {
@@ -315,8 +361,9 @@ build_ubuntu_cpu() {
build_ubuntu_cpu_openblas() {
set -ex
- export CC="ccache gcc"
- export CXX="ccache g++"
+ export CC="gcc"
+ export CXX="g++"
+ build_ccache_wrappers
make \
DEV=1 \
ENABLE_TESTCOVERAGE=1 \
@@ -344,6 +391,7 @@ build_ubuntu_cpu_cmake_debug() {
set -ex
pushd .
cd /work/build
+ build_ccache_wrappers
cmake \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
@@ -365,11 +413,12 @@ build_ubuntu_cpu_cmake_asan() {
pushd .
cd /work/build
+ export CXX=g++-8
+ export CC=gcc-8
+ build_ccache_wrappers
cmake \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
- -DCMAKE_CXX_COMPILER=/usr/bin/g++-8 \
- -DCMAKE_C_COMPILER=/usr/bin/gcc-8 \
-DUSE_CUDA=OFF \
-DUSE_MKL_IF_AVAILABLE=OFF \
-DUSE_OPENMP=OFF \
@@ -391,10 +440,10 @@ build_ubuntu_cpu_cmake_asan() {
build_ubuntu_cpu_clang39() {
set -ex
- export CXX=clang++-3.9
+ export CXX=clang++-3.9
export CC=clang-3.9
- build_ccache_wrappers
- make \
+ build_ccache_wrappers
+ make \
ENABLE_TESTCOVERAGE=1 \
USE_CPP_PACKAGE=1 \
USE_BLAS=openblas \
@@ -429,6 +478,7 @@ build_ubuntu_cpu_clang_tidy() {
pushd .
cd /work/build
+ build_ccache_wrappers
cmake \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
@@ -458,7 +508,6 @@ build_ubuntu_cpu_clang39_mkldnn() {
ENABLE_TESTCOVERAGE=1 \
USE_CPP_PACKAGE=1 \
USE_BLAS=openblas \
- USE_MKLDNN=1 \
USE_OPENMP=0 \
-j$(nproc)
}
@@ -475,7 +524,6 @@ build_ubuntu_cpu_clang60_mkldnn() {
ENABLE_TESTCOVERAGE=1 \
USE_CPP_PACKAGE=1 \
USE_BLAS=openblas \
- USE_MKLDNN=1 \
USE_OPENMP=1 \
-j$(nproc)
}
@@ -490,7 +538,6 @@ build_ubuntu_cpu_mkldnn() {
ENABLE_TESTCOVERAGE=1 \
USE_CPP_PACKAGE=1 \
USE_BLAS=openblas \
- USE_MKLDNN=1 \
-j$(nproc)
}
@@ -526,6 +573,8 @@ build_ubuntu_gpu_tensorrt() {
mkdir -p build
cd build
cmake \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_FLAGS=-I/usr/include/python${PYVER}\
-DBUILD_SHARED_LIBS=ON ..\
-G Ninja
@@ -540,7 +589,10 @@ build_ubuntu_gpu_tensorrt() {
cd 3rdparty/onnx-tensorrt/
mkdir -p build
cd build
- cmake ..
+ cmake \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ ..
make -j$(nproc)
export LIBRARY_PATH=`pwd`:$LIBRARY_PATH
popd
@@ -578,7 +630,6 @@ build_ubuntu_gpu_mkldnn() {
ENABLE_TESTCOVERAGE=1 \
USE_CPP_PACKAGE=1 \
USE_BLAS=openblas \
- USE_MKLDNN=1 \
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=1 \
@@ -595,7 +646,6 @@ build_ubuntu_gpu_mkldnn_nocudnn() {
DEV=1 \
ENABLE_TESTCOVERAGE=1 \
USE_BLAS=openblas \
- USE_MKLDNN=1 \
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=0 \
@@ -618,11 +668,16 @@ build_ubuntu_gpu_cuda91_cudnn7() {
USE_DIST_KVSTORE=1 \
CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
-j$(nproc)
+ cp lib/libmkldnn.so.0 lib/libmkldnn.so.0.tmp
+ mv lib/libmkldnn.so.0.tmp lib/libmkldnn.so.0
+ cp lib/libiomp5.so lib/libiomp5.so.tmp
+ mv lib/libiomp5.so.tmp lib/libiomp5.so
}
build_ubuntu_amalgamation() {
set -ex
# Amalgamation can not be run with -j nproc
+ build_ccache_wrappers
make -C amalgamation/ clean
make -C amalgamation/ \
USE_BLAS=openblas \
@@ -632,6 +687,7 @@ build_ubuntu_amalgamation() {
build_ubuntu_amalgamation_min() {
set -ex
# Amalgamation can not be run with -j nproc
+ build_ccache_wrappers
make -C amalgamation/ clean
make -C amalgamation/ \
USE_BLAS=openblas \
@@ -642,14 +698,15 @@ build_ubuntu_amalgamation_min() {
build_ubuntu_gpu_cmake_mkldnn() {
set -ex
cd /work/build
+ build_ccache_wrappers
cmake \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-DENABLE_TESTCOVERAGE=ON \
-DUSE_CUDA=1 \
-DUSE_CUDNN=1 \
-DUSE_MKLML_MKL=1 \
- -DUSE_MKLDNN=1 \
-DCMAKE_BUILD_TYPE=Release \
-DCUDA_ARCH_NAME=Manual \
-DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
@@ -665,9 +722,11 @@ build_ubuntu_gpu_cmake_mkldnn() {
build_ubuntu_gpu_cmake() {
set -ex
cd /work/build
+ build_ccache_wrappers
cmake \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-DENABLE_TESTCOVERAGE=ON \
-DUSE_CUDA=1 \
-DUSE_CUDNN=1 \
@@ -681,6 +740,9 @@ build_ubuntu_gpu_cmake() {
/work/mxnet
ninja -v
+ # libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0.
+ cp 3rdparty/mkldnn/src/libmkldnn.so.0 3rdparty/mkldnn/src/libmkldnn.so.0.tmp
+ mv 3rdparty/mkldnn/src/libmkldnn.so.0.tmp 3rdparty/mkldnn/src/libmkldnn.so.0
}
build_ubuntu_blc() {
@@ -792,6 +854,9 @@ unittest_ubuntu_cpu_scala() {
unittest_centos7_cpu_scala() {
set -ex
+ mkdir -p /work/mxnet/3rdparty/mkldnn/build/install/lib/
+ cp lib/libmkldnn.so.0 /work/mxnet/3rdparty/mkldnn/build/install/lib/libmkldnn.so
+ cp lib/libmklml_intel.so /work/mxnet/3rdparty/mkldnn/build/install/lib/libmklml_intel.so
cd /work/mxnet
make scalapkg USE_BLAS=openblas USE_DIST_KVSTORE=1 ENABLE_TESTCOVERAGE=1
make scalaunittest USE_BLAS=openblas USE_DIST_KVSTORE=1 ENABLE_TESTCOVERAGE=1
@@ -861,6 +926,7 @@ unittest_ubuntu_cpu_julia06() {
# FIXME
export LD_PRELOAD='/usr/lib/x86_64-linux-gnu/libjemalloc.so'
+ # export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH
# use the prebuilt binary from $MXNET_HOME/lib
julia -e 'Pkg.build("MXNet")'
@@ -944,6 +1010,9 @@ integrationtest_ubuntu_cpu_dist_kvstore() {
integrationtest_ubuntu_gpu_scala() {
set -ex
+ mkdir -p /work/mxnet/3rdparty/mkldnn/build/install/lib/
+ cp lib/libmkldnn.so.0 /work/mxnet/3rdparty/mkldnn/build/install/lib/libmkldnn.so
+ cp lib/libmklml_intel.so /work/mxnet/3rdparty/mkldnn/build/install/lib/libmklml_intel.so
make scalapkg USE_OPENCV=1 USE_BLAS=openblas USE_CUDA=1 USE_CUDA_PATH=/usr/local/cuda USE_CUDNN=1 USE_DIST_KVSTORE=1 SCALA_ON_GPU=1 ENABLE_TESTCOVERAGE=1
make scalaintegrationtest USE_OPENCV=1 USE_BLAS=openblas USE_CUDA=1 USE_CUDA_PATH=/usr/local/cuda USE_CUDNN=1 SCALA_TEST_ON_GPU=1 USE_DIST_KVSTORE=1 ENABLE_TESTCOVERAGE=1
}
@@ -1138,7 +1207,7 @@ nightly_straight_dope_python3_multi_gpu_tests() {
nightly_tutorial_test_ubuntu_python3_gpu() {
set -ex
cd /work/mxnet/docs
- export BUILD_VER=tutorial
+ export BUILD_VER=tutorial
export MXNET_DOCS_BUILD_MXNET=0
make html
export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
@@ -1168,7 +1237,7 @@ deploy_docs() {
set -ex
pushd .
- make docs
+ make docs SPHINXOPTS=-W
popd
}
@@ -1187,6 +1256,7 @@ deploy_jl_docs() {
# FIXME
export LD_PRELOAD='/usr/lib/x86_64-linux-gnu/libjemalloc.so'
+ # export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH
# use the prebuilt binary from $MXNET_HOME/lib
julia -e 'Pkg.build("MXNet")'
@@ -1224,5 +1294,3 @@ EOF
declare -F | cut -d' ' -f3
echo
fi
-
-
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 74bde1eee211..94cc81a444a6 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -23,22 +23,22 @@
utils = load('ci/Jenkinsfile_utils.groovy')
// mxnet libraries
-mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
+mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, lib/libmklml_intel.so, lib/libmkldnn.so.0, lib/libiomp5.so'
// Python wheels
mx_pip = 'build/*.whl'
// for scala build, need to pass extra libs when run with dist_kvstore
-mx_dist_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a'
+mx_dist_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a, lib/libmklml_intel.so, lib/libmkldnn.so.0, lib/libiomp5.so'
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default.
-mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so'
+mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default.
mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
-mx_tensorrt_lib = 'lib/libmxnet.so, lib/libnvonnxparser_runtime.so.0, lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so'
-mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a, build/cpp-package/example/*'
-mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/cpp-package/example/*'
+mx_tensorrt_lib = 'lib/libmxnet.so, lib/libnvonnxparser_runtime.so.0, lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so, lib/libmklml_intel.so, lib/libmkldnn.so.0, lib/libiomp5.so'
+mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a, build/cpp-package/example/*, lib/libmklml_intel.so, lib/libmkldnn.so.0, lib/libiomp5.so'
+mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/cpp-package/example/*, lib/libmklml_intel.so, lib/libmkldnn.so.0, lib/libiomp5.so'
// Python unittest for CPU
// Python 2
diff --git a/ci/jenkins/Jenkinsfile_edge b/ci/jenkins/Jenkinsfile_edge
index 275a0c96de94..c101ba102386 100644
--- a/ci/jenkins/Jenkinsfile_edge
+++ b/ci/jenkins/Jenkinsfile_edge
@@ -34,7 +34,7 @@ utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_
utils.main_wrapper(
core_logic: {
utils.parallel_stage('Build', [
-// custom_steps.compile_armv8_jetson_gpu(),
+ custom_steps.compile_armv8_jetson_gpu(),
custom_steps.compile_armv7_cpu(),
custom_steps.compile_armv6_cpu(),
custom_steps.compile_armv8_cpu(),
diff --git a/cpp-package/example/README.md b/cpp-package/example/README.md
index c7223e94c920..c2329330b6be 100644
--- a/cpp-package/example/README.md
+++ b/cpp-package/example/README.md
@@ -2,7 +2,8 @@
## Building C++ examples
-The examples are built while building the MXNet library and cpp-package from source . However, they can be built manually as follows
+The examples in this folder demonstrate the **training** workflow. The **inference workflow** related examples can be found in [inference]() folder.
+The examples in this folder are built while building the MXNet library and cpp-package from source . However, they can be built manually as follows
From cpp-package/examples directory
@@ -18,7 +19,7 @@ The examples that are built to be run on GPU may not work on the non-GPU machine
The makefile will also download the necessary data files and store in a data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.)
-## Examples
+## Examples demonstrating training workflow
This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable viz. **LD\_LIBRARY\_PATH** for Linux, Mac and Ubuntu OS and **PATH** for Windows OS. For example `export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/home/ubuntu/incubator-mxnet/lib` on ubuntu using gpu.
diff --git a/cpp-package/example/inference/Makefile b/cpp-package/example/inference/Makefile
new file mode 100644
index 000000000000..5efe6cfb68e5
--- /dev/null
+++ b/cpp-package/example/inference/Makefile
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+CPPEX_SRC = $(wildcard *.cpp)
+CPPEX_EXE = $(patsubst %.cpp, %, $(CPPEX_SRC))
+OPENCV_CFLAGS=`pkg-config --cflags opencv`
+OPENCV_LDFLAGS=`pkg-config --libs opencv`
+
+CXX=g++
+
+
+CFLAGS=$(COMMFLAGS) -I../../../3rdparty/tvm/nnvm/include -I../../../3rdparty/dmlc-core/include -I ../../include -I ../../../include -Wall -O3 -msse3 -funroll-loops -Wno-unused-parameter -Wno-unknown-pragmas
+CPPEX_EXTRA_LDFLAGS := -L../../../lib -lmxnet $(OPENCV_LDFLAGS)
+
+all: $(CPPEX_EXE)
+
+debug: CPPEX_CFLAGS += -DDEBUG -g
+debug: all
+
+
+$(CPPEX_EXE):% : %.cpp
+ $(CXX) -std=c++0x $(CFLAGS) $(CPPEX_CFLAGS) -o $@ $(filter %.cpp %.a, $^) $(CPPEX_EXTRA_LDFLAGS)
+
+clean:
+ rm -f $(CPPEX_EXE)
diff --git a/cpp-package/example/inference/README.md b/cpp-package/example/inference/README.md
new file mode 100644
index 000000000000..79831b40b6bd
--- /dev/null
+++ b/cpp-package/example/inference/README.md
@@ -0,0 +1,41 @@
+# MXNet C++ Package Inference Workflow Examples
+
+## Building C++ Inference examples
+
+The examples in this folder demonstrate the **inference** workflow.
+To build examples use following commands:
+
+- Release: **make all**
+- Debug: **make debug all**
+
+
+## Examples demonstrating inference workflow
+
+This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable viz. **LD\_LIBRARY\_PATH** for Linux, Mac and Ubuntu OS and **PATH** for Windows OS.
+
+### [inception_inference.cpp]()
+
+This example demonstrates image classification workflow with pre-trained models using MXNet C++ API. The command line parameters the example can accept are as shown below:
+
+```
+./inception_inference --help
+Usage:
+inception_inference --symbol
+ --params
+ --image ) downloads the pre-trained **Inception** model and a test image. The users can invoke this script as follows:
+
+```
+./unit_test_inception_inference.sh
+```
diff --git a/cpp-package/example/inference/inception_inference.cpp b/cpp-package/example/inference/inception_inference.cpp
new file mode 100644
index 000000000000..7005e745b2f4
--- /dev/null
+++ b/cpp-package/example/inference/inception_inference.cpp
@@ -0,0 +1,446 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * This example demonstrates image classification workflow with pre-trained models using MXNet C++ API.
+ * The example performs following tasks.
+ * 1. Load the pre-trained model.
+ * 2. Load the parameters of pre-trained model.
+ * 3. Load the image to be classified in to NDArray.
+ * 4. Normalize the image using the mean of images that were used for training.
+ * 5. Run the forward pass and predict the input image.
+ */
+
+#include
+#include
+#include
+#include