diff --git a/CMakeLists.txt b/CMakeLists.txt
index 16d365355ceb..d229cb0847d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -234,6 +234,7 @@ include_directories("include")
 include_directories("mshadow")
 include_directories("3rdparty/cub")
 include_directories("nnvm/include")
+include_directories("nnvm/tvm/include")
 include_directories("dmlc-core/include")
 include_directories("dlpack/include")
 
@@ -696,4 +697,3 @@ endif()
 set(LINT_DIRS "include src plugin cpp-package tests")
 set(EXCLUDE_PATH "src/operator/contrib/ctc_include")
 add_custom_target(mxnet_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DLINT_DIRS=${LINT_DIRS} -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -DPROJECT_NAME=mxnet -DEXCLUDE_PATH=${EXCLUDE_PATH} -P ${CMAKE_CURRENT_SOURCE_DIR}/dmlc-core/cmake/lint.cmake)
-
diff --git a/Jenkinsfile b/Jenkinsfile
index c23bbbfe5a50..78af1cf021d4 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -38,12 +38,12 @@ def init_git() {
   deleteDir()
   retry(5) {
     try {
-      // Make sure wait long enough for api.github.com request quota. Important: Don't increase the amount of 
+      // Make sure wait long enough for api.github.com request quota. Important: Don't increase the amount of
       // retries as this will increase the amount of requests and worsen the throttling
       timeout(time: 15, unit: 'MINUTES') {
         checkout scm
-        sh 'git submodule update --init'
-        sh 'git clean -d -f'        
+        sh 'git submodule update --init --recursive'
+        sh 'git clean -d -f'
       }
     } catch (exc) {
       deleteDir()
@@ -61,8 +61,8 @@ def init_git_win() {
       // retries as this will increase the amount of requests and worsen the throttling
       timeout(time: 15, unit: 'MINUTES') {
         checkout scm
-        bat 'git submodule update --init'
-        bat 'git clean -d -f'        
+        bat 'git submodule update --init --recursive'
+        bat 'git clean -d -f'
       }
     } catch (exc) {
       deleteDir()
@@ -332,6 +332,7 @@ try {
           make('build_cuda', flag)
           pack_lib('gpu')
           stash includes: 'build/cpp-package/example/test_score', name: 'cpp_test_score'
+          stash includes: 'build/cpp-package/example/test_optimizer', name: 'cpp_test_optimizer'
         }
       }
     },
@@ -676,6 +677,7 @@ try {
           init_git()
           unpack_lib('gpu')
           unstash 'cpp_test_score'
+          unstash 'cpp_test_optimizer'
           timeout(time: max_time, unit: 'MINUTES') {
             sh "${docker_run} gpu --dockerbinary nvidia-docker cpp-package/tests/ci_test.sh"
           }
diff --git a/Makefile b/Makefile
index cb3e63ba13b0..5d81c7fbb160 100644
--- a/Makefile
+++ b/Makefile
@@ -91,7 +91,7 @@ ifeq ($(DEBUG), 1)
 else
 	CFLAGS += -O3 -DNDEBUG=1
 endif
-CFLAGS += -I$(ROOTDIR)/mshadow/ -I$(ROOTDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -Iinclude $(MSHADOW_CFLAGS)
+CFLAGS += -I$(ROOTDIR)/mshadow/ -I$(ROOTDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(NNVM_PATH)/tvm/include -Iinclude $(MSHADOW_CFLAGS)
 LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
 ifeq ($(DEBUG), 1)
 	NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
@@ -356,7 +356,7 @@ ifeq ($(USE_CUDA), 1)
 		LDFLAGS += -lcuda -lnvrtc
 		CFLAGS += -DMXNET_ENABLE_CUDA_RTC=1
 	endif
-	# Make sure to add stubs as fallback in order to be able to build 
+	# Make sure to add stubs as fallback in order to be able to build
 	# without full CUDA install (especially if run without nvidia-docker)
 	LDFLAGS += -L/usr/local/cuda/lib64/stubs
 	SCALA_PKG_PROFILE := $(SCALA_PKG_PROFILE)-gpu
diff --git a/cpp-package/example/test_optimizer.cpp b/cpp-package/example/test_optimizer.cpp
new file mode 100644
index 000000000000..bf465b786988
--- /dev/null
+++ b/cpp-package/example/test_optimizer.cpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "mxnet-cpp/MxNetCpp.h"
+
+using namespace std;
+using namespace mxnet::cpp;
+
+int main(int argc, char** argv) {
+  // Confirm >1 optimizers can be created w/o error
+  Optimizer* opt = OptimizerRegistry::Find("sgd");
+  opt = OptimizerRegistry::Find("adam");
+  int ret = (opt == 0) ? 1 : 0;
+
+  MXNotifyShutdown();
+  return ret;
+}
diff --git a/cpp-package/include/mxnet-cpp/optimizer.hpp b/cpp-package/include/mxnet-cpp/optimizer.hpp
index e3d47d1161c6..26fd00f3a162 100644
--- a/cpp-package/include/mxnet-cpp/optimizer.hpp
+++ b/cpp-package/include/mxnet-cpp/optimizer.hpp
@@ -125,13 +125,16 @@ inline float Optimizer::GetWD_(int index) {
 }
 
 inline Optimizer* OptimizerRegistry::Find(const std::string& name) {
-  MXNETCPP_REGISTER_OPTIMIZER(sgd, SGDOptimizer);
-  MXNETCPP_REGISTER_OPTIMIZER(ccsgd, SGDOptimizer);  // For backward compatibility
-  MXNETCPP_REGISTER_OPTIMIZER(rmsprop, RMSPropOptimizer);
-  MXNETCPP_REGISTER_OPTIMIZER(adam, AdamOptimizer);
-  MXNETCPP_REGISTER_OPTIMIZER(adagrad, AdaGradOptimizer);
-  MXNETCPP_REGISTER_OPTIMIZER(adadelta, AdaDeltaOptimizer);
-  MXNETCPP_REGISTER_OPTIMIZER(signum, SignumOptimizer);
+  if (cmap().empty()) {
+    // Optimizers should only be registered once
+    MXNETCPP_REGISTER_OPTIMIZER(sgd, SGDOptimizer);
+    MXNETCPP_REGISTER_OPTIMIZER(ccsgd, SGDOptimizer);  // For backward compatibility
+    MXNETCPP_REGISTER_OPTIMIZER(rmsprop, RMSPropOptimizer);
+    MXNETCPP_REGISTER_OPTIMIZER(adam, AdamOptimizer);
+    MXNETCPP_REGISTER_OPTIMIZER(adagrad, AdaGradOptimizer);
+    MXNETCPP_REGISTER_OPTIMIZER(adadelta, AdaDeltaOptimizer);
+    MXNETCPP_REGISTER_OPTIMIZER(signum, SignumOptimizer);
+  }
   auto it = cmap().find(name);
   if (it == cmap().end())
     return nullptr;
diff --git a/cpp-package/tests/ci_test.sh b/cpp-package/tests/ci_test.sh
index 3b2af35bf1be..2042529ace01 100755
--- a/cpp-package/tests/ci_test.sh
+++ b/cpp-package/tests/ci_test.sh
@@ -22,6 +22,9 @@ export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH
 echo $LD_LIBRARY_PATH
 ls -l ../../lib/
 
+cp ../../build/cpp-package/example/test_optimizer .
+./test_optimizer
+
 cp ../../build/cpp-package/example/test_score .
 ./get_mnist.sh
 ./test_score 0.93
diff --git a/dlpack b/dlpack
index a6e09b58dc00..10892ac964f1 160000
--- a/dlpack
+++ b/dlpack
@@ -1 +1 @@
-Subproject commit a6e09b58dc00ee0065f5b7879800e646fbb01d1e
+Subproject commit 10892ac964f1af7c81aae145cd3fab78bbccd297
diff --git a/docs/_static/mxnet-theme/index.html b/docs/_static/mxnet-theme/index.html
index d22e2541903c..3b48832a03cd 100644
--- a/docs/_static/mxnet-theme/index.html
+++ b/docs/_static/mxnet-theme/index.html
@@ -21,9 +21,9 @@
   <div class="container">
     <div class="row">
       <div class="col-lg-4 col-sm-12">
-        <h3>Apache MXNet 1.0 Released</h3>
-        <p>We're excited to announce the release of MXNet 1.0! Check out the release notes for latest updates.</p>
-        <a href="https://github.com/apache/incubator-mxnet/releases/tag/1.0.0">Learn More</a>
+        <h3>Apache MXNet 1.1.0 Released</h3>
+        <p>We're excited to announce the release of MXNet 1.1.0! Check out the release notes for latest updates.</p>
+        <a href="https://github.com/apache/incubator-mxnet/releases/tag/1.1.0">Learn More</a>
       </div>
       <div class="col-lg-4 col-sm-12">
         <h3>MXNet Model Server</h3>
diff --git a/docs/build_version_doc/AddVersion.py b/docs/build_version_doc/AddVersion.py
index 2c9ee22bf42e..c4d088a4b0f4 100755
--- a/docs/build_version_doc/AddVersion.py
+++ b/docs/build_version_doc/AddVersion.py
@@ -57,6 +57,9 @@
         for name in files:
             if not name.endswith('.html'):
                 continue
+            if 'install' in path:
+                print("Skipping this path: {}".format(path))
+                continue
             with open(os.path.join(path, name), 'r') as html_file:
                 content = bs(html_file, 'html.parser')
             navbar = content.find(id="main-nav")
@@ -74,7 +77,7 @@
                 outstr = str(content).replace('&lt;', '<').replace('&gt;', '>')
             # Fix link
             if args.current_version == tag_list[0]:
-                print("Fixing" + os.path.join(path, name))
+                print("Fixing " + os.path.join(path, name))
                 outstr = outstr.replace('https://mxnet.io', 'https://mxnet.incubator.apache.org')
                 outstr = outstr.replace('http://mxnet.io', 'https://mxnet.incubator.apache.org')
             else:
diff --git a/docs/build_version_doc/Dockerfile b/docs/build_version_doc/Dockerfile
new file mode 100755
index 000000000000..204320e32611
--- /dev/null
+++ b/docs/build_version_doc/Dockerfile
@@ -0,0 +1,44 @@
+FROM ubuntu:16.04
+LABEL maintainer="markhama@amazon.com"
+
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    apt-transport-https \
+    build-essential \
+    ca-certificates \
+    curl \
+    doxygen \
+    git \
+    libatlas-base-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libopencv-dev \
+    pandoc \
+    python-numpy \
+    python-pip \
+    software-properties-common \
+    unzip \
+    wget
+
+# Setup Scala
+RUN echo "deb https://dl.bintray.com/sbt/debian /" | tee -a /etc/apt/sources.list.d/sbt.list
+RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823
+RUN apt-get update && apt-get install -y \
+    sbt \
+    scala 
+
+RUN pip install --upgrade pip && pip install \
+    beautifulsoup4 \
+    breathe \
+    CommonMark==0.5.4 \
+    h5py \
+    mock==1.0.1 \
+    pypandoc \
+    recommonmark==0.4.0 \
+    sphinx==1.5.6 
+    
+
+COPY *.sh /
+COPY *.py /
+RUN /build_all_version.sh "1.1.0 1.0.0 0.12.1 0.12.0 0.11.0 master"
+RUN /update_all_version.sh "1.1.0 1.0.0 0.12.1 0.12.0 0.11.0 master" 1.1.0 http://mxnet.incubator.apache.org/
diff --git a/docs/build_version_doc/build_all_version.sh b/docs/build_version_doc/build_all_version.sh
index 6a37815fd50e..4db9326a4464 100755
--- a/docs/build_version_doc/build_all_version.sh
+++ b/docs/build_version_doc/build_all_version.sh
@@ -19,67 +19,65 @@
 
 # This script is for locally building website for all versions
 # Built files are stored in $built
-# Version numbers are stored in $tag_list.
-# Version numbers are ordered from latest to old and final one is master.
+
+# Takes one argument:
+# * tag list - space delimited list of Github tags; Example: "1.1.0 1.0.0 master"
+# Example Usage:
+# ./build_all_version.sh "1.1.0 1.0.0 master"
+
 set -e
 set -x
 
-tag_list="1.1.0 1.0.0 0.12.1 0.12.0 0.11.0 master"
+if [ -z "$1" ]
+  then
+    echo "Please provide a list of version tags you wish to run."
+    exit 1
+  else
+    tag_list="$1"
+    echo "Using these tags: $1"
+fi
 
 mxnet_url="https://github.com/apache/incubator-mxnet.git"
 mxnet_folder="apache_mxnet"
 built="VersionedWeb"
-mkdir $built
-mkdir "$built/versions"
 
-git clone $mxnet_url $mxnet_folder --recursive
-cd "$mxnet_folder/docs"
-tag_file="tag_list.txt"
+if [ ! -d "$mxnet_folder" ]; then
+  mkdir $mxnet_folder
+  git clone $mxnet_url $mxnet_folder --recursive
+fi
 
-# Write all version numbers into $tag_file
-for tag in $tag_list; do
-    if [ $tag != 'master' ]
-    then
-        echo "$tag" >> "$tag_file"
-    fi
-done
+if [ ! -d "$built" ]; then
+  mkdir $built
+  mkdir "$built/versions"
+fi
 
 # Build all versions and use latest version(First version number in $tag_list) as landing page.
-version_num=0
 for tag in $tag_list; do
+    cd "$mxnet_folder"
+    git fetch
     if [ $tag == 'master' ]
-    then
-        git checkout master
-    else
-        git checkout "tags/$tag"
+        then
+            git checkout master
+            git pull
+        else
+            git checkout "tags/$tag"
+    fi
+    if [ $tag == '0.11.0' ]
+      then
+          git checkout master -- docs/mxdoc.py
     fi
-
     git submodule update || exit 1
-    cd ..
     make clean
     cd docs
     make clean
-    make html USE_OPENMP=0 || exit 1
-    python build_version_doc/AddVersion.py --file_path "_build/html/" --current_version "$tag" || exit 1
-
-    if [ $tag != 'master' ]
-    then 
-        python build_version_doc/AddPackageLink.py --file_path "_build/html/get_started/install.html" \
-                                                   --current_version "$tag" || exit 1
-    fi
-
-    if [ $version_num == 0 ]
-    then
-        cp -a _build/html/. "../../$built"
-    else
-        file_loc="../../$built/versions/$tag"
-        mkdir "$file_loc"
-        cp -a _build/html/. "$file_loc"
+    make html USE_OPENMP=1 || exit 1
+    cd ../../
+    file_loc="$built/versions/$tag"
+    if [ -d "$file_loc" ] ; then
+        rm -rf "$file_loc"
     fi
-
-    ((++version_num))
+    mkdir "$file_loc"
+    cp -a "$mxnet_folder/docs/_build/html/." "$file_loc"
 done
-    
-mv "$tag_file" "../../$built/tag.txt"
-cd ../..
-rm -rf "$mxnet_folder"
+
+echo "Now you may want to run update_all_version.sh to create the production layout with the versions dropdown and other per-version corrections."
diff --git a/docs/build_version_doc/build_doc.sh b/docs/build_version_doc/build_doc.sh
index eefc81e362e8..427f40c592a0 100755
--- a/docs/build_version_doc/build_doc.sh
+++ b/docs/build_version_doc/build_doc.sh
@@ -19,10 +19,22 @@
 set -e
 set -x
 
+# This script is run on a nightly basis. Refer to Job: http://jenkins.mxnet-ci.amazon-ml.com/job/incubator-mxnet-build-site/
+# Job should pass in paramters:
+# web_url=https://github.com/apache/incubator-mxnet-site
+# web_branch=asf-site
+# release_branch=v1.1.0 (example). This needs to come from the job config
+
+# First parameter sent by job configuration: https://github.com/apache/incubator-mxnet-site
 web_url="$1"
+
+# Second parameter sent by job configuration: asf-site
+web_branch="$2"
+
 web_folder="VersionedWeb"
+
 local_build="latest"
-web_branch="$2"
+
 git clone $web_url $web_folder
 cd $web_folder
 git checkout $web_branch
@@ -37,28 +49,64 @@ while read -r line
 do
     tag_list+=("$line")
 done < "$tag_list_file"
+
+# This is the first tag found in tag.txt
 latest_tag=${tag_list[0]}
-echo "latest_tag is: $latest_tag"
+echo "++++ LATEST TAG found in tag.txt file is : $latest_tag ++++"
+
 commit_id=$(git rev-parse HEAD)
+
+# Find the current TAG in GIT
 curr_tag=${TAG}
 curr_tag=${curr_tag:5}
-echo "Current tag is $curr_tag"
+
+echo "++++ CURRENT TAG IN GIT is $curr_tag ++++"
+
+# If current tag in git is newer than latest tag found in tag.txt
 if [[ "$curr_tag" != 'master' ]] && [ $curr_tag != $latest_tag ]
 then
+    echo "++++ Found a git TAG $curr_tag newer than mxnet repo tag $latest_tag , we need to build a new release ++++"
+    echo "assigning curr_tag to latest_tag"
     latest_tag=$curr_tag
 fi
 
 # Build new released tag
 if [ $latest_tag != ${tag_list[0]} ]
 then
-    echo "Building new tag"
+    echo " ******************************************  " 
+    echo " Building new release on: $latest_tag "
+    echo " ******************************************  " 
     git submodule update
+
+    # checkout the latest release tag.
+    echo "++++ Checking out and building new tag $latest_tag ++++"
+    git checkout tags/$latest_tag
     make docs || exit 1
-    echo -e "$latest_tag\n$(cat $tag_list_file)" > "$tag_list_file"
-    cat $tag_list_file
+    
     tests/ci_build/ci_build.sh doc python docs/build_version_doc/AddVersion.py --file_path "docs/_build/html/" --current_version "$latest_tag"
     tests/ci_build/ci_build.sh doc python docs/build_version_doc/AddPackageLink.py \
                                           --file_path "docs/_build/html/install/index.html" --current_version "$latest_tag"
+
+    # Update the tag_list (tag.txt).
+    ###### content of tag.txt########
+    # <latest_tag_goes_here>
+    # 1.0.0
+    # 0.12.1
+    # 0.12.0
+    # 0.11.0
+    echo "++++ Adding $latest_tag to the top of the $tag_list_file ++++"
+    echo -e "$latest_tag\n$(cat $tag_list_file)" > "$tag_list_file"
+    cat $tag_list_file
+
+    # The following block does the following:
+    # a. copies the static html that was built from new tag to a local sandbox folder.
+    # b. copies the  $tag_list_file into local sandbox tag.txt        
+    # c. removes .git in VersionedWeb folder
+    # d. copies VersionedWeb/versions to local sandbox versions folder.
+    # e. makes a new directory with the previous TAG version. N-1 version name (example current: 1.1.0, Previous: 1.0.0)       
+    # f. Copies ReadMe.md to the local sandbox build.
+    # g. removes the content of VersionedWeb completely.
+    # f. Adds new content from local sandbox build to VersionedWeb.          
     cp -a "docs/_build/html/." "$local_build"
     cp $tag_list_file "$local_build/tag.txt"
     rm -rf "$web_folder/.git"
@@ -69,26 +117,32 @@ then
     rm -rf "$local_build/versions/${tag_list[0]}/versions"
     rm -rf "$web_folder/*"
     cp -a "$local_build/." "$web_folder"
-fi
+  
+    echo " ******************************************  " 
+    echo " Successfully built new release $latest_tag "
+    echo " ******************************************  " 
+else
+    # Build latest master
+    echo " ********** Building Master ************ "
 
-# Build latest master
-git checkout master
-git checkout -- .
-git submodule update
-echo "Building master"
-make docs || exit 1
+    make docs || exit 1
 
-rm -rfv $web_folder/versions/master/*
-cp -a "docs/_build/html/." "$web_folder/versions/master"
-tests/ci_build/ci_build.sh doc python docs/build_version_doc/AddVersion.py --file_path "$web_folder/versions/master"
+    rm -rfv $web_folder/versions/master/*
+    cp -a "docs/_build/html/." "$web_folder/versions/master"
+    tests/ci_build/ci_build.sh doc python docs/build_version_doc/AddVersion.py --file_path "$web_folder/versions/master"
+fi
 
 # Update version list for all previous version website
 if [ $latest_tag != ${tag_list[0]} ]
 then
     total=${#tag_list[*]}
-    for (( i=0; i<=$(( $total -1 )); i++ ))
+    for (( i=0; i<=$(( $total - 1 )); i++ ))
+    
     do
         tests/ci_build/ci_build.sh doc python docs/build_version_doc/AddVersion.py --file_path "$web_folder/versions/${tag_list[$i]}" \
                                               --current_version "${tag_list[$i]}"
     done
+
+    # Update master version dropdown
+    tests/ci_build/ci_build.sh doc python docs/build_version_doc/AddVersion.py --file_path "$web_folder/versions/master" 
 fi
diff --git a/docs/build_version_doc/build_site_tag.sh b/docs/build_version_doc/build_site_tag.sh
new file mode 100755
index 000000000000..d453e0cc9734
--- /dev/null
+++ b/docs/build_version_doc/build_site_tag.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# How this script works:
+# 1. Receive tag list
+#    Looks like: tag_list="1.1.0 1.0.0 0.12.1 0.12.0 0.11.0 master"
+# 2. Receive default tag (for main website view)
+# 3. Receive root URL
+# 4. Call build and then update scripts
+
+# Take user input or check env var for tag list
+if [ -z "$1" ]
+  then
+    echo "No tag list supplied... trying environment variable $TAG_LIST"
+  else
+    tag_list="${TAG_LIST:-"$1"}"
+    echo "Using these tags: $1"
+fi
+
+if [ -z "$tag_list" ]
+  then
+    echo "No tags defined"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "Please pick a version to use as a default for the website. Ex: 1.1.0"
+    exit 1
+  else
+    tag_default=$2
+fi
+
+if [ -z "$3" ]
+  then
+    echo "Please provide the root url for the site. Ex: http://mxnet.incubator.apache.org/"
+    exit 1
+  else
+    root_url=$3
+fi
+
+# Pass params to build and update scripts
+for tag in $tag_list; do
+  ./build_all_version.sh $tag || exit 1
+done
+
+./update_all_version.sh "$tag_list" $tag_default $root_url || exit 1
+
diff --git a/docs/build_version_doc/setup_docs_ubuntu.sh b/docs/build_version_doc/setup_docs_ubuntu.sh
new file mode 100755
index 000000000000..98b9259c6966
--- /dev/null
+++ b/docs/build_version_doc/setup_docs_ubuntu.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# If you need to build <= v0.12.0 then use a Python 2 environment
+# mxdoc.py - a sphinx extension, was not Python 3 compatible in the old versions
+# source activate mxnet_p27
+
+# Install dependencies
+sudo apt-get update
+sudo apt-get install -y \
+    apt-transport-https \
+    ca-certificates \
+    curl \
+    doxygen \
+    software-properties-common
+
+pip install --user \
+    beautifulsoup4 \
+    breathe \
+    CommonMark==0.5.4 \
+    h5py \
+    mock==1.0.1 \
+    pypandoc \
+    recommonmark==0.4.0 \
+    sphinx==1.5.6
+
+# Recommonmark/Sphinx errors: https://github.com/sphinx-doc/sphinx/issues/3800
+
+
+# Setup scala
+echo "deb https://dl.bintray.com/sbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list
+sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823
+sudo apt-get update
+sudo apt-get install -y \
+  sbt \
+  scala
+
+# Cleanup
+sudo apt autoremove -y
+
+# Make docs using the manual way
+# cd .. && make html USE_OPENMP=0
+# using the docker way
+# sudo make docs
+
diff --git a/docs/build_version_doc/update_all_version.sh b/docs/build_version_doc/update_all_version.sh
new file mode 100755
index 000000000000..e79b97117c5a
--- /dev/null
+++ b/docs/build_version_doc/update_all_version.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script will update the html content from building 
+# different tags.
+# It assumes you have already run build_all_version.sh for 
+# the tags you want to update.
+
+# Takes three arguments:
+# * tag list - space delimited list of Github tags; Example: "1.1.0 1.0.0 master"
+# * default tag - which version should the site default to; Example: 1.0.0
+# * root URL - for the versions dropdown to change to production or dev server; Example: http://mxnet.incubator.apache.org/
+
+# Example Usage:
+# ./update_all_version.sh "1.1.0 1.0.0 master" 1.0.0 http://mxnet.incubator.apache.org/
+
+set -e
+set -x
+
+if [ -z "$1" ]
+  then    
+    echo "Please provide a list of version tags you wish to run. Ex : \"1.1.0 1.0.0 master\""
+    exit 1
+  else
+    tag_list=$1
+fi    
+
+if [ -z "$2" ]
+  then    
+    echo "Please pick a version to use as a default for the website. Ex: 1.1.0"
+    exit 1
+  else
+    tag_default=$2
+fi    
+
+if [ -z "$3" ]
+  then
+    echo "Please provide the root url for the site. Ex: http://mxnet.incubator.apache.org/"
+    exit 1
+  else
+    root_url=$3
+fi
+
+mxnet_folder="apache_mxnet"
+built="VersionedWeb"
+tag_file="tag_list.txt"
+
+if [ -f "$tag_file" ]; then
+  rm $tag_file
+fi
+
+# Write all version numbers into $tag_file for AddVersion.py to use later
+# Master is added by that script by default
+for tag in $tag_list; do
+    if [ $tag != 'master' ]
+    then
+        echo "$tag" >> "$tag_file"
+    fi
+done
+
+# Update the specified tags with the Versions dropdown
+for tag in $tag_list; do
+    # This Python script is expecting the tag_list.txt and it will use that as the entries to populate
+    python AddVersion.py --root_url "$root_url" --file_path "$built/versions/$tag" --current_version "$tag" || exit 1
+
+    if [ $tag != 'master' ]
+    then 
+        python AddPackageLink.py --file_path "$built/versions/master/install/index.html" \
+                                                   --current_version "$tag" || exit 1
+    fi
+
+    if [ $tag == $tag_default ]
+    then
+        cp -a "$built/versions/$tag/." "$built"
+    else
+        file_loc="$built/versions/$tag"
+        #rm -rf "$file_loc"
+        #mkdir "$file_loc"
+        #cp -a $mxnet_folder/docs/_build/html/. "$file_loc"
+    fi
+done
+    
+echo "The output of this process can be found in the VersionedWeb folder."
+
diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py
index 0e0cd521f28d..9412b6f9371b 100755
--- a/example/image-classification/common/fit.py
+++ b/example/image-classification/common/fit.py
@@ -237,6 +237,9 @@ def fit(args, network, data_loader, **kwargs):
         if args.network == 'alexnet':
             # AlexNet will not converge using Xavier
             initializer = mx.init.Normal()
+            # VGG will not trend to converge using Xavier-Gaussian
+        elif 'vgg' in args.network:
+            initializer = mx.init.Xavier()
         else:
             initializer = mx.init.Xavier(
                 rnd_type='gaussian', factor_type="in", magnitude=2)
diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h
index 168ddcca24b7..59c1eacb2c58 100755
--- a/include/mxnet/tensor_blob.h
+++ b/include/mxnet/tensor_blob.h
@@ -36,8 +36,18 @@
 #include <utility>
 #include <algorithm>
 #include "./base.h"
+
 namespace mxnet {
 
+// redefine DLPack enumeration to be backward compatible.
+constexpr const int kCPU = kDLCPU;
+constexpr const int kGPU = kDLGPU;
+// extension type code under TVM function.
+// Currently NNVM reserved 16 to 19 type code from TVM
+// 16, 17, 18 is used by NNVM compiler already.
+// Pick code 19 for MXNet NDArray
+constexpr const int kTVMNDArrayTypeCode = 19;
+
 /* Forward declaration for friend declaration in TBlob */
 class NDArray;
 
diff --git a/nnvm b/nnvm
index 7a052d678455..c342da72271c 160000
--- a/nnvm
+++ b/nnvm
@@ -1 +1 @@
-Subproject commit 7a052d678455f1c96538c1cc5a25f11115363558
+Subproject commit c342da72271c85e477480323f1d91997c6101ac0
diff --git a/python/mxnet/contrib/__init__.py b/python/mxnet/contrib/__init__.py
index 21c77719b70b..ad1010443f9e 100644
--- a/python/mxnet/contrib/__init__.py
+++ b/python/mxnet/contrib/__init__.py
@@ -28,3 +28,4 @@
 from . import tensorboard
 
 from . import text
+from . import onnx
diff --git a/python/mxnet/contrib/onnx/__init__.py b/python/mxnet/contrib/onnx/__init__.py
new file mode 100644
index 000000000000..e5402002bd80
--- /dev/null
+++ b/python/mxnet/contrib/onnx/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from . import _import
diff --git a/python/mxnet/contrib/onnx/_import/__init__.py b/python/mxnet/contrib/onnx/_import/__init__.py
new file mode 100644
index 000000000000..748c1df19a54
--- /dev/null
+++ b/python/mxnet/contrib/onnx/_import/__init__.py
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+"""import function"""
+import onnx
+from .import_onnx import GraphProto
+
+def import_model(model_file):
+    """Imports the supplied ONNX model file into MXNet symbol and parameters.
+
+    Parameters
+    ----------
+    model_file : ONNX model file name
+
+    Returns
+    -------
+    sym : mx.symbol
+        Compatible mxnet symbol
+
+    params : dict of str to mx.ndarray
+        Dict of converted parameters stored in mx.ndarray format
+    """
+    graph = GraphProto()
+
+    # loads model file and returns ONNX protobuf object
+    model_proto = onnx.load(model_file)
+    sym, params = graph.from_onnx(model_proto.graph)
+    return sym, params
diff --git a/python/mxnet/contrib/onnx/_import/common.py b/python/mxnet/contrib/onnx/_import/common.py
new file mode 100644
index 000000000000..9154f009ffdd
--- /dev/null
+++ b/python/mxnet/contrib/onnx/_import/common.py
@@ -0,0 +1,131 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=invalid-name,no-self-use,too-many-branches,too-few-public-methods,too-many-arguments
+"""Shared functions and classes for frontends."""
+from __future__ import absolute_import as _abs
+from ....base import string_types
+
+class AttributeConverter(object):
+    """Common attribute converter. An AttributeConverter instance is a callable:
+    ```
+    attr_converter = AttributeConverter(op_name, transforms={'a':'b', 'c':('d', 1)})
+    new_op_name, new_attr = attr_converter(attrs)
+    ```
+
+    Parameters
+    ----------
+    op_name : str or callable
+        If set as str, returned operator name is the str.
+        If set as callable, returned operator is the str returned by calling:
+        `op_name = func(attr)`
+    transforms : dict of `new_name, or (new_name, default_value, transform function)`
+        If only a new_name is provided, it's like renaming the attribute name.
+        If default_value if provided, then the attribute is considered as optional.
+        If transform function is provided, the original attribute value is handled
+        by transform function.
+    excludes : list
+        A list of excluded attributes that should `NOT` appear.
+        Raise NotImplementedError if occurred.
+    disables : list
+        A list of attributes that is disabled in mxnet. Raise warnings.
+    ignores : list
+        A list of attributes that is ignored in mxnet. Silent.
+    extras : dict
+        A series of additional attributes should be added anyway to the returned
+        attribute dict.
+    custom_check : callable
+        A custom function takes attribute, and return True/False.
+        Raise RuntimeError if not bool(True) returned.
+    """
+    def __init__(self, op_name, transforms=None,
+                 excludes=None, disables=None, ignores=None,
+                 extras=None, custom_check=None):
+        self._op_name = op_name
+        self._transforms = transforms if transforms else {}
+        self._excludes = excludes if excludes else []
+        self._disables = disables if disables else []
+        self._ignores = ignores if ignores else []
+        self._extras = extras if extras else {}
+        self._custom_check = custom_check
+
+    def __call__(self, attrs):
+        # apply custom check
+        if self._custom_check:
+            func, msg = self._custom_check
+            if not func(attrs):
+                raise RuntimeError("Check failed: {}".format(msg))
+        # get new op_name
+        if isinstance(self._op_name, string_types):
+            op_name = self._op_name
+        else:
+            assert callable(self._op_name), "op_name can either be string or callable"
+            op_name = self._op_name(attrs)
+        # convert attributes
+        new_attrs = {}
+        for k in attrs.keys():
+            if k in self._excludes:
+                raise NotImplementedError("Attribute {} not supported yet.".format(k))
+            elif k in self._ignores:
+                pass
+            elif k in self._transforms:
+                new_name, defaults, transform = self._parse_default(self._transforms[k])
+                if defaults is None:
+                    new_attr = self._required_attr(attrs, k)
+                else:
+                    new_attr = attrs.get(k, None)
+                if new_attr is None:
+                    new_attrs[new_name] = defaults
+                else:
+                    new_attrs[new_name] = transform(new_attr)
+            else:
+                # copy
+                new_attrs[k] = attrs[k]
+        # add extras
+        new_attrs.update(self._extras)
+        return op_name, new_attrs
+
+    def _parse_default(self, target):
+        """Helper function to parse default values."""
+        if not isinstance(target, (list, tuple)):
+            k, v, t = target, None, lambda x: x
+        elif len(target) == 1:
+            k, v, t = target[0], None, lambda x: x
+        elif len(target) == 2:
+            k, v, t = target[0], target[1], lambda x: x
+        elif len(target) > 2:
+            k, v, t = target[0], target[1], target[2]
+        else:
+            k = None  # should raise
+        if not isinstance(k, string_types):
+            msg = "{} is not a valid target, (name, default) expected.".format(target)
+            raise ValueError(msg)
+        return k, v, t
+
+    def _parse_bool(self, value):
+        """Helper function to parse default boolean values."""
+        if isinstance(value, string_types):
+            return value.strip().lower() in ['true', '1', 't', 'y', 'yes']
+        return bool(value)
+
+    def _required_attr(self, attr, key):
+        """Wrapper for getting required attributes."""
+        assert isinstance(attr, dict)
+        if key not in attr:
+            raise AttributeError("Required attribute {} not found.".format(key))
+        return attr[key]
diff --git a/python/mxnet/contrib/onnx/_import/import_helper.py b/python/mxnet/contrib/onnx/_import/import_helper.py
new file mode 100644
index 000000000000..f67dcc3c05d6
--- /dev/null
+++ b/python/mxnet/contrib/onnx/_import/import_helper.py
@@ -0,0 +1,245 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=invalid-name
+"""Operator attributes conversion"""
+from .common import AttributeConverter as AttrCvt 
+
+def _revert_caffe2_pad(attr):
+    """Removing extra padding from Caffe2."""
+    if len(attr) == 4:
+        attr = attr[:2]
+    elif len(attr) == 2:
+        pass
+    else:
+        raise ValueError("Invalid caffe2 type padding: {}".format(attr))
+    return attr
+
+def _math_name_picker(surfix):
+    def _impl(attr):
+        if attr.get('broadcast', 0):
+            return 'broadcast_' + surfix
+        return 'elemwise_' + surfix
+    return _impl
+
+def _broadcast_constraint():
+    def _broadcast_check(attrs):
+        if attrs.get('axis', None):
+            return False
+        return True
+    return _broadcast_check, "Specifying broadcast axis not allowed."
+
+def _dimension_constraint():
+    """checking dimensions for conv, deconv, pooling operators"""
+    def _dim_check(attrs):
+        if len(attrs['kernel_shape']) == 2:
+            return True
+        return False
+    return _dim_check, "Only 2d kernel supported."
+
+def _elemwise(name):
+    """converting attributes for add operator"""
+    return AttrCvt(
+        op_name=_math_name_picker(name),
+        disables=['axis'],
+        ignores=['broadcast'])
+
+def _pooling(name):
+    """converting attributes for pooling operator"""
+    return AttrCvt(
+        op_name='Pooling',
+        transforms={
+            'kernel_shape': 'kernel',
+            'strides': 'stride',
+            'pads': 'pad'},
+        # pooling convention full to match caffe2
+        extras={'pool_type': name, 'pooling_convention':'valid'},
+        custom_check=_dimension_constraint())
+
+def _conv():
+    """converting attributes for convolution operator"""
+    return AttrCvt(
+        op_name='Convolution',
+        transforms={
+            'kernel_shape': 'kernel',
+            'strides': 'stride',
+            'dilations': ('dilate', (0, 0)),
+            'pads': ('pad', (0, 0), _revert_caffe2_pad),
+            'group': ('num_group', 1)},
+        custom_check=_dimension_constraint())
+
+def _conv_transpose():
+    """converting attributes for deconvolution operator"""
+    return AttrCvt(
+        op_name='Deconvolution',
+        transforms={
+            'kernel_shape': 'kernel',
+            'strides': 'stride',
+            'dilations': ('dilate', (0, 0)),
+            'pads': ('pad', (0, 0), _revert_caffe2_pad),
+            'group': ('num_group', 1)},
+        disables=['output_shape'],
+        custom_check=_dimension_constraint())
+
+def _batch_norm():
+    """converting attributes for BatchNorm operator"""
+    return AttrCvt(
+        op_name='BatchNorm',
+        transforms={'epsilon': 'eps'},
+        extras={'cudnn_off': 1},
+        ignores=['spatial', 'is_test', 'consumed_inputs'])
+
+def _activation(name):
+    """converting attributes for LeakyRelu operator"""
+    return AttrCvt(
+        op_name='LeakyReLU',
+        transforms={
+            'alpha':'slope'},
+        extras={'act_type': name})
+
+def _pad_sequence_fix(attr, kernelDim=None):
+    """Changing onnx's pads sequence to match with mxnet's pad_width
+    mxnet: (x1_begin, x1_end, ... , xn_begin, xn_end)
+    onnx: (x1_begin, x2_begin, ... , xn_end, xn_end)"""
+    new_attr = ()
+    if len(attr) % 2 == 0:
+        for index in range(int(len(attr) / 2)):
+            new_attr = new_attr + attr[index::int(len(attr) / 2)]
+        # Making sure pad values  are in the attr for all axes.
+        if kernelDim is not None:
+            while len(new_attr) < kernelDim*2:
+                new_attr = new_attr + (0, 0)
+    return new_attr
+
+def _pad():
+    """converting attributes for Pad operator"""
+    return AttrCvt(
+        op_name='pad',
+        transforms={
+            'pads': ('pad_width', (0, 0, 0, 0, 0, 0, 0, 0), _pad_sequence_fix),
+            'value': 'constant_value'})
+
+def _global_pooling(name):
+    """Requires kernel attribute which is not present in onnx currently.
+    So for now giving default kernel."""
+    return AttrCvt(
+        op_name='Pooling',
+        extras={'global_pool': True,
+                'kernel': (1, 1),
+                'pool_type': name})
+
+def _upsample_scale_fix(attr):
+    """Scale attribute conversion from float to int"""
+    return int(attr)
+
+def _upsample_restrict_mode(attr):
+    """Mxnet's current UpSampling operator doesn't work well in bilinear mode.
+    New operator is coming in this PR https://github.com/apache/incubator-mxnet/pull/9688/
+    Issue to track this: https://github.com/onnx/onnx-mxnet/issues/33
+    For now, only nearest mode is enabled."""
+    if attr.decode() != 'nearest':
+        raise ValueError("Only nearest mode is supported: {}".format(attr))
+    return attr.decode()
+
+def _upsample(name):
+    """converting attributes for UpSampling operator"""
+    return AttrCvt(
+        op_name=name,
+        transforms={'height_scale': ('scale', 1, _upsample_scale_fix),
+                    'mode': ('sample_type', 'nearest', _upsample_restrict_mode),
+                    'width_scale': ('scale', 1, _upsample_scale_fix)})
+
+# _convert_map defines maps of name to converter functor(callable)
+_convert_map = {
+    # defs/experimental
+    'FC'            : AttrCvt('FullyConnected', ignores=['axis', 'axis_w']),
+
+    # defs/generator
+    'Constant': AttrCvt('identity'),
+    'RandomUniform' : AttrCvt('random_uniform', ignores=['seed']),
+    'RandomNormal'  : AttrCvt('random_normal', {'mean':'loc'}, ignores=['seed']),
+    'RandomUniformLike' : AttrCvt('random_uniform', ignores=['seed']),
+    'RandomNormalLike': AttrCvt('random_normal', {'mean':'loc'}, ignores=['seed']),
+
+    # defs/logical
+
+    # defs/math
+    'Add'           : _elemwise('add'),
+    'Sub'           : _elemwise('sub'),
+    'Mul'           : _elemwise('mul'),
+    'Div'           : _elemwise('div'),
+    'Neg'           : AttrCvt('negative'),
+    'Abs'           : AttrCvt('abs'),
+    'Reciprocal'    : AttrCvt('reciprocal'),
+    'Floor'         : AttrCvt('floor'),
+    'Ceil'          : AttrCvt('ceil'),
+    'Sqrt'          : AttrCvt('sqrt'),
+    'Gemm'          : AttrCvt('linalg_gemm', {'transA':'transpose_a', 'transB':'transpose_b'},
+                              ignores=['broadcast']),
+    'Relu'          : AttrCvt('relu'),
+    'LeakyRelu'     : AttrCvt('LeakyReLU', {'alpha': 'slope'}),
+    # 'Selu'
+    'Elu'           : _activation('elu'),
+    'Exp'           : AttrCvt('exp'),
+    'Log'           : AttrCvt('log'),
+    'Tanh'          : AttrCvt('tanh'),
+    'Pow'           : AttrCvt('pow', {'exponent':'exp'}),
+    'Dot'           : AttrCvt('dot'),
+    'MatMul'        : AttrCvt('linalg_gemm2'),
+    # 'PRelu'
+    'Sigmoid'       : AttrCvt('sigmoid'),
+    'Max'           : AttrCvt('maximum'), #elemwise maximum
+    'Min'           : AttrCvt('minimum'), #elemwise minimum
+    'Sum'           : AttrCvt('add_n'), #elemwise sum
+    # softmax default axis is different in onnx
+    'Softmax'       : AttrCvt('softmax', extras={'axis': 1}),
+
+    # defs/nn
+    'AveragePool'   : _pooling('avg'),
+    'MaxPool'       : _pooling('max'),
+    'Conv'          : _conv(),
+    'ConvTranspose' : _conv_transpose(),
+    'GlobalAveragePool': _global_pooling('avg'),
+    'GlobalMaxPool' : _global_pooling('max'),
+    'BatchNormalization': _batch_norm(),
+    'SpatialBN'     : _batch_norm(),
+    'Dropout'       : AttrCvt('Dropout', {'ratio': 'p'}, ignores=['is_test']),
+    'Flatten'       : AttrCvt('flatten'),
+    'LRN'           : AttrCvt('LRN', {'bias': 'knorm', 'size' : 'nsize'}),
+    # defs/reduction
+    'ReduceMax'     : AttrCvt('max', {'axes': 'axis'}),
+    'ReduceMin'     : AttrCvt('min', {'axes': 'axis'}),
+    'ReduceSum'     : AttrCvt('sum', {'axes': 'axis'}),
+    'ReduceMean'    : AttrCvt('mean', {'axes': 'axis'}),
+    'ReduceProd'    : AttrCvt('prod', {'axes': 'axis'}),
+    # 'ReduceLogSumExp'
+    'ArgMax'        : AttrCvt('argmax'),
+    'ArgMin'        : AttrCvt('argmin'),
+
+    # defs/tensor
+    'Cast'          : AttrCvt('cast', {'to': 'dtype'}),
+    'Reshape'       : AttrCvt('reshape'),
+    'Concat'        : AttrCvt('concat', {'axis': 'dim'}),
+    'Split'         : AttrCvt('split', {'split': 'num_outputs'}),
+    'Pad'           : _pad(),
+    'Slice'         : AttrCvt('slice_axis', {'axes': 'axis', 'ends': 'end', 'starts': 'begin'}),
+    'Transpose'     : AttrCvt('transpose', {'perm': 'axes'}),
+    'Squeeze'       : AttrCvt('split', {'axes': 'axis'}),
+    # 'Gather'
+    'Upsample'      : _upsample('UpSampling')
+}
diff --git a/python/mxnet/contrib/onnx/_import/import_onnx.py b/python/mxnet/contrib/onnx/_import/import_onnx.py
new file mode 100644
index 000000000000..232ec6673d62
--- /dev/null
+++ b/python/mxnet/contrib/onnx/_import/import_onnx.py
@@ -0,0 +1,299 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=invalid-name,too-many-locals,no-self-use
+""" Support import export formats."""
+from __future__ import absolute_import as _abs
+from .... import symbol
+from .... import ndarray as nd
+from .import_helper import _convert_map, _pad_sequence_fix
+
+def _convert_operator(op_name, attrs, convert_map=None):
+    """Convert from onnx operator to mxnet operator.
+    The converter must specify conversions explicitly for incompatible name, and
+    apply handlers to operator attributes.
+
+    Parameters
+    ----------
+    op_name : str
+        Operator name, such as Convolution, FullyConnected
+    attrs : dict
+        Dict of operator attributes
+    identity_list : list
+        List of operators that don't require conversion
+    convert_map : dict
+        Dict of name : callable, where name is the op's name that
+        require conversion to mxnet, callable are functions which
+        take attrs and return (new_op_name, new_attrs)
+
+    Returns
+    -------
+    (op_name, attrs)
+        Converted (op_name, attrs) for mxnet.
+    """
+    convert_map = convert_map if convert_map else _convert_map
+    if op_name in convert_map:
+        op_name, attrs = convert_map[op_name](attrs)
+    else:
+        raise NotImplementedError("Operator {} not implemented.".format(op_name))
+    op = getattr(symbol, op_name, None)
+    if not op:
+        raise RuntimeError("Unable to map op_name {} to sym".format(op_name))
+    return op, attrs
+
+class GraphProto(object): # pylint: disable=too-few-public-methods
+    """A helper class for handling mxnet symbol copying from pb2.GraphProto.
+    Definition: https://github.com/onnx/onnx/blob/master/onnx/onnx.proto
+    """
+    def __init__(self):
+        self._nodes = {}
+        self._params = {}
+        self._renames = {}
+        self._num_input = 0
+        self._num_param = 0
+
+    def from_onnx(self, graph):
+        """Construct symbol from onnx graph.
+        The inputs from onnx graph is vague, only providing "1", "2"...
+        For convenience, we rename the `real` input names to "input_0",
+        "input_1"... And renaming parameters to "param_0", "param_1"...
+
+        Parameters
+        ----------
+        graph : onnx protobuf object
+            The loaded onnx graph
+
+        Returns
+        -------
+        sym :symbol.Symbol
+            The returned mxnet symbol
+        params : dict
+            A dict of name: nd.array pairs, used as pretrained weights
+        """
+        # parse network inputs, aka parameters
+        for init_tensor in graph.initializer:
+            if not init_tensor.name.strip():
+                raise ValueError("Tensor's name is required.")
+            self._params[init_tensor.name] = self._parse_array(init_tensor)
+
+        # converting GraphProto message
+        for i in graph.input:
+            if i.name in self._params:
+                # i is a param instead of input
+                name_param = 'param_{}'.format(self._num_param)
+                self._num_param += 1
+                self._params[name_param] = self._params.pop(i.name)
+                self._nodes[name_param] = symbol.Variable(name=name_param,
+                                                          shape=self._params[name_param].shape)
+                self._renames[i.name] = name_param
+            else:
+                name_input = 'input_{}'.format(self._num_input)
+                self._num_input += 1
+                self._nodes[name_input] = symbol.Variable(name=name_input)
+                self._renames[i.name] = name_input
+
+        # constructing nodes, nodes are stored as directed acyclic graph
+        # converting NodeProto message
+        for node in graph.node:
+            op_name = node.op_type
+            node_name = node.name.strip()
+            node_name = node_name if node_name else None
+            onnx_attr = self._parse_attr(node.attribute)
+            new_op, mx_attr = _convert_operator(op_name, onnx_attr)
+            inputs = [self._nodes[self._renames.get(i, i)] for i in node.input]
+
+            # some workarounds for inconsistencies in onnx and mxnet conventions.
+            mx_attr = self._fix_bias(new_op, mx_attr, len(inputs))
+            mx_attr = self._fix_channels(new_op, mx_attr, list(node.input))
+            self._fix_bias_shape(node.op_type, node.input, onnx_attr)
+
+            # calling again to get new symbols after some workarounds
+            inputs = [self._nodes[self._renames.get(i, i)] for i in node.input]
+
+            # onnx's Gemm operator also supports broadcasting C input which
+            # mxnet's equivalent linalg_gemm doesn't. So using combination of
+            # transpose and FullyConnected operators.
+            if op_name == 'Gemm':
+                new_op, inputs, mx_attr = self._fix_gemm('FullyConnected', inputs, onnx_attr)
+
+            # onnx slice works on multiple axes whereas mxnet's slice_axis is for single axis
+            if op_name == 'Slice':
+                op = self._fix_slice(inputs, mx_attr)
+            elif op_name == 'AveragePool' and onnx_attr.get('pads') is not None or \
+                                    op_name == 'MaxPool' and onnx_attr.get('pads') is not None:
+                op = self._fix_pooling(op_name, inputs, onnx_attr)
+            elif op_name == 'Squeeze':
+                op = self._fix_squeeze(inputs, mx_attr)
+            else:
+                op = new_op(name=node_name, *inputs, **mx_attr)
+
+            node_output = self._fix_outputs(op_name, node.output)
+
+            assert len(node_output) == len(op.list_outputs()), (
+                "Number of output mismatch {} vs {} in {}.".format(
+                    len(node_output), len(op.list_outputs()), op_name))
+            for k, i in zip(list(node_output), range(len(node_output))):
+                self._nodes[k] = op[i]
+        # now return the outputs
+        out = [self._nodes[i.name] for i in graph.output]
+        if len(out) > 1:
+            out = symbol.Group(out)
+        else:
+            out = out[0]
+        return out, self._params
+
+    def _fix_pooling(self, op_name, inputs, new_attr):
+        """onnx pooling operator supports asymmetrical padding
+        Adding pad operator before pooling in mxnet to work with onnx"""
+        pool_type = 'avg' if op_name == 'AveragePool' else 'max'
+        stride = new_attr.get('strides')
+        kernel = new_attr.get('kernel_shape')
+        padding = new_attr.get('pads')
+        pad_width = (0, 0, 0, 0) + _pad_sequence_fix(padding, len(kernel))
+        new_pad_op = symbol.pad(inputs[0], mode='constant', pad_width=pad_width)
+        new_pooling_op = symbol.Pooling(new_pad_op, pool_type=pool_type,
+                                        stride=stride, kernel=kernel)
+        return new_pooling_op
+
+    def _fix_slice(self, inputs, new_attr):
+        """onnx slice provides slicing on multiple axis. Adding multiple slice_axis operator
+        for multiple axes from mxnet"""
+        begin = new_attr.get('begin')
+        end = new_attr.get('end')
+        axes = new_attr.get('axis', tuple(range(len(begin))))
+        slice_op = symbol.slice_axis(inputs[0], axis=axes[0], begin=begin[0], end=end[0])
+        if len(axes) > 1:
+            for i, axis in enumerate(axes):
+                slice_op = symbol.slice_axis(slice_op, axis=axis, begin=begin[i], end=end[i])
+        return slice_op
+
+    def _fix_squeeze(self, inputs, new_attr):
+        """
+        MXNet doesnt have a squeeze operator.
+        Using "split" to perform similar operation.
+        "split" can be slower compared to "reshape".
+         This can have performance impact.
+         TODO: Remove this implementation once mxnet adds the support.
+        """
+        axes = new_attr.get('axis')
+        op = symbol.split(inputs[0], axis=axes[0], num_outputs=1, squeeze_axis=1)
+        for i in axes[1:]:
+            op = symbol.split(op, axis=i-1, num_outputs=1, squeeze_axis=1)
+        return op
+
+    def _fix_gemm(self, op_name, inputs, old_attr):
+        """Using FullyConnected operator in place of linalg_gemm to perform same operation"""
+        op = getAttr(symbol, op_name, None)
+        alpha = float(old_attr.get('alpha', 1.0))
+        beta = float(old_attr.get('beta', 1.0))
+        transA = int(old_attr.get('transA', 0))
+        transB = int(old_attr.get('transB', 0))
+        if transA:
+            inputs[0] = symbol.transpose(inputs[0], axes=(1, 0))
+        if not transB:
+            inputs[1] = symbol.transpose(inputs[1], axes=(1, 0))
+        new_inputs = [alpha*inputs[0], inputs[1], beta*inputs[2]]
+        new_attr = {'num_hidden' : self._params[inputs[2].name].shape[0]}
+        return op, new_inputs, new_attr
+
+    def _parse_array(self, tensor_proto):
+        """Grab data in TensorProto and convert to numpy array."""
+        try:
+            from onnx.numpy_helper import to_array
+        except ImportError as e:
+            raise ImportError("Unable to import onnx which is required {}".format(e))
+        np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
+        return nd.array(np_array)
+
+    def _parse_attr(self, attr_proto):
+        """Convert a list of AttributeProto to a dict, with names as keys."""
+        attrs = {}
+        for a in attr_proto:
+            for f in ['f', 'i', 's']:
+                if a.HasField(f):
+                    attrs[a.name] = getattr(a, f)
+            for f in ['floats', 'ints', 'strings']:
+                if list(getattr(a, f)):
+                    assert a.name not in attrs, "Only one type of attr is allowed"
+                    attrs[a.name] = tuple(getattr(a, f))
+            for f in ['t', 'g']:
+                if a.HasField(f):
+                    attrs[a.name] = getattr(a, f)
+            for f in ['tensors', 'graphs']:
+                if list(getattr(a, f)):
+                    raise NotImplementedError("Filed {} is not supported in mxnet.".format(f))
+            if a.name not in attrs:
+                raise ValueError("Cannot parse attribute: \n{}\n.".format(a))
+        return attrs
+
+    def _fix_outputs(self, op, outputs):
+        """A workaround to handle dropout or similar operator that have more than one out
+        in ONNX.
+        """
+        if op == 'Dropout':
+            assert len(outputs) == 2, "ONNX have two outputs for dropout layer."
+            outputs = outputs[:-1]
+        return outputs
+
+    def _fix_bias(self, op, attrs, num_inputs):
+        """A workaround for 'use_bias' attribute since onnx don't provide this attribute,
+        we have to check the number of inputs to decide it."""
+        if op not in [symbol.Convolution, symbol.Deconvolution, symbol.FullyConnected]:
+            return attrs
+        if num_inputs == 3:
+            attrs['no_bias'] = False
+        elif num_inputs == 2:
+            attrs['no_bias'] = True
+        else:
+            raise ValueError("Unexpected number of inputs for: {}".format(op))
+        return attrs
+
+
+    def _fix_bias_shape(self, op_name, inputs, attrs):
+        """A workaround to reshape bias term to (1, num_channel)."""
+        if (op_name == 'Add' or op_name == 'Mul') and (int(len(self._params)) > 0) and \
+                ('broadcast' in attrs and attrs['broadcast'] == 1):
+            assert len(list(inputs)) == 2
+            bias_name = self._renames.get(inputs[1], inputs[1])
+            bias = self._params[bias_name]
+            assert len(bias.shape) == 1
+            # reshape to (1, n)
+            bias = nd.array(bias.asnumpy().reshape((1, -1, 1, 1)))
+            # broadcast_add expects shape with sym.variable
+            self._nodes[bias_name] = symbol.Variable(name=bias_name, shape=bias.shape)
+            self._params[bias_name] = bias
+
+
+    def _fix_channels(self, op, attrs, inputs):
+        """A workaround for getting 'channels' or 'units' since onnx don't provide
+        these attributes. We check the shape of weights provided to get the number.
+        """
+        if op not in [symbol.Convolution, symbol.Deconvolution, symbol.FullyConnected]:
+            return attrs
+        weight_name = self._renames[inputs[1]]
+        if not weight_name in self._params:
+            raise ValueError("Unable to get channels/units attr from onnx graph.")
+        else:
+            wshape = self._params[weight_name].shape
+            assert len(wshape) >= 2, "Weights shape is invalid: {}".format(wshape)
+            channels = wshape[0]
+            if op in [symbol.FullyConnected]:
+                attrs['num_hidden'] = channels
+            else:
+                attrs['num_filter'] = channels
+        return attrs
diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py
index 0a02b80a1c06..ddffc01bd238 100644
--- a/python/mxnet/metric.py
+++ b/python/mxnet/metric.py
@@ -510,28 +510,27 @@ def update_binary_stats(self, label, pred):
         if len(numpy.unique(label)) > 2:
             raise ValueError("%s currently only supports binary classification."
                              % self.__class__.__name__)
+        pred_true = (pred_label == 1)
+        pred_false = 1 - pred_true
+        label_true = (label == 1)
+        label_false = 1 - label_true
 
-        for y_pred, y_true in zip(pred_label, label):
-            if y_pred == 1 and y_true == 1:
-                self.true_positives += 1.
-            elif y_pred == 1 and y_true == 0:
-                self.false_positives += 1.
-            elif y_pred == 0 and y_true == 1:
-                self.false_negatives += 1.
-            else:
-                self.true_negatives += 1.
+        self.true_positives += (pred_true * label_true).sum()
+        self.false_positives += (pred_true * label_false).sum()
+        self.false_negatives += (pred_false * label_true).sum()
+        self.true_negatives += (pred_false * label_false).sum()
 
     @property
     def precision(self):
         if self.true_positives + self.false_positives > 0:
-            return self.true_positives / (self.true_positives + self.false_positives)
+            return float(self.true_positives) / (self.true_positives + self.false_positives)
         else:
             return 0.
 
     @property
     def recall(self):
         if self.true_positives + self.false_negatives > 0:
-            return self.true_positives / (self.true_positives + self.false_negatives)
+            return float(self.true_positives) / (self.true_positives + self.false_negatives)
         else:
             return 0.
 
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 59389ddb220d..5ac279635a1d 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -174,8 +174,15 @@ class NDArray(NDArrayBase):
     __slots__ = []
     # make numpy functions return NDArray instead of numpy object array
     __array_priority__ = 1000.0
+    # Extension type code for TVM function.
+    # See C++ side of definition(kTVMNDArrayTypeCode) at include/mxmet/tensor_blob.h
+    _tvm_tcode = 19
     # pylint: disable= no-member, undefined-variable
 
+    @property
+    def _tvm_handle(self):
+        return self.handle.value
+
     def __repr__(self):
         """Returns a string representation of the array."""
         shape_info = 'x'.join(['%d' % x for x in self.shape])
diff --git a/python/setup.py b/python/setup.py
index cf94adf982d5..1ef14d95a0d1 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -28,7 +28,7 @@
 else:
     from setuptools import setup
     from setuptools.extension import Extension
-    kwargs = {'install_requires': ['numpy<=1.13.3,>=1.8.2', 'requests==2.18.4', 'graphviz==0.8.1'], 'zip_safe': False}
+    kwargs = {'install_requires': ['numpy<=1.13.3,>=1.8.2', 'requests==2.18.4', 'graphviz==0.8.1', 'onnx>=1.0.1'], 'zip_safe': False}
 from setuptools import find_packages
 
 with_cython = False
diff --git a/src/nnvm/tvm_bridge.cc b/src/nnvm/tvm_bridge.cc
new file mode 100644
index 000000000000..06929984640d
--- /dev/null
+++ b/src/nnvm/tvm_bridge.cc
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm_bridge.cc
+ * \brief Bridge to run TVM's PackedFunc in MXNet's async engine.
+ *
+ *  This bridge is mainly used to expose MXNet's async engine push to
+ *  TVM. It only uses TVM runtime in aheader only mode, which means
+ *  there is no link dependencies.
+ *
+ *  Support for TVM is optional even when this code
+ *  is always compiled and built with the project.
+ *  We choose this strategy because we do not yet want
+ *  llvm as dependency(which TVM uses). So instead we expose hook
+ *  to TVM and let user use this feature when they have TVM installed.
+ *
+ *  We do require TVM and MXNet to be built with same C++ ABI of std::function
+ */
+#define TVM_RUNTIME_HEADER_ONLY 1
+#include <tvm/runtime/packed_func.h>
+#include <mxnet/c_api.h>
+#include <mxnet/ndarray.h>
+#include <mxnet/engine.h>
+
+#include <memory>
+
+namespace mxnet {
+
+using tvm::runtime::PackedFunc;
+using tvm::runtime::TVMArgs;
+using tvm::runtime::TVMRetValue;
+
+/*!
+ * \brief Async functor object
+ *  calling argument of the function.
+ */
+class TVMFunctor {
+ public:
+  // constructor
+  explicit TVMFunctor(PackedFunc func, PackedFunc fset_stream)
+      : func_(func), fset_stream_(fset_stream) {}
+
+  void Init(const TVMArgs& args,
+            const std::vector<int>& const_loc,
+            std::vector<Engine::VarHandle>* const_vars,
+            std::vector<Engine::VarHandle>* mutate_vars) {
+    values_.clear();
+    type_codes_.clear();
+    values_.insert(values_.end(), args.values, args.values + args.size());
+    type_codes_.insert(
+        type_codes_.end(), args.type_codes, args.type_codes + args.size());
+
+    size_t const_loc_ptr = 0;
+    for (int i = 0; i < args.size(); ++i) {
+      if (args.type_codes[i] == kTVMNDArrayTypeCode) {
+        const NDArray& nd =
+            static_cast<NDArray*>(args.values[i].v_handle)[0];
+        // We cannot set the value until
+        type_codes_[i] = kArrayHandle;
+        array_data_.push_back(nd);
+        array_loc_.push_back(i);
+        // check if there is read or mutate
+        // by default assume we mutate the array.
+        if (const_loc_ptr < const_loc.size() &&
+            i == const_loc[const_loc_ptr]) {
+          const_vars->push_back(nd.var());
+          ++const_loc_ptr;
+        } else {
+          mutate_vars->push_back(nd.var());
+        }
+      } else {
+        CHECK_LT(args.type_codes[i], kTVMType)
+            << "Only allow POD type in mxnet async call";
+      }
+    }
+  }
+
+  Context ctx() {
+    return array_data_[0].ctx();
+  }
+
+  void Run(const RunContext& rctx) {
+    // setup DLTensor
+    for (size_t i = 0; i < array_loc_.size(); ++i) {
+      values_[array_loc_[i]].v_handle =
+          const_cast<DLTensor*>(&(array_data_[i].data().dltensor()));
+    }
+    // run the packed function
+    TVMRetValue rv;
+    TVMArgs args(&values_[0], &type_codes_[0], values_.size());
+    if (ctx().dev_type == Context::kGPU) {
+#if MXNET_USE_CUDA
+      // pass stream via last argument.
+      void* strm = static_cast<void*>(rctx.get_stream<gpu>()->stream_);
+      int dev_type = kDLGPU;
+      fset_stream_(dev_type, rctx.ctx.dev_id, strm);
+      func_.CallPacked(args, &rv);
+      fset_stream_(dev_type, rctx.ctx.dev_id, nullptr);
+#else
+      LOG(FATAL) << "Please compile with CUDA enabled for cuda features";
+#endif
+    } else {
+      func_.CallPacked(args, &rv);
+    }
+  }
+
+ private:
+  /*! \brief The function */
+  PackedFunc func_;
+  /*! \brief Set stream */
+  PackedFunc fset_stream_;
+  /*! \brief Values field */
+  std::vector<TVMValue> values_;
+  /*! \brief type code field */
+  std::vector<int> type_codes_;
+  /*! \brief arrays field */
+  std::vector<NDArray> array_data_;
+  /*! \brief position of array in arguments */
+  std::vector<int> array_loc_;
+};
+
+
+// Wrap a TVM function to a function that invokes MXNet's Engine
+// It does two things: call the engine properly
+// set up the NDArray to DLTensor during invocation.
+void WrapAsyncCall(TVMArgs wrap_args, TVMRetValue* wrap_rv) {
+  PackedFunc f = wrap_args[0];
+  PackedFunc fset_stream =  wrap_args[1];
+  int num_const = wrap_args[2];
+
+  // sorted position of constant arguments
+  std::vector<int> const_loc;
+  for (int i = 0; i < num_const; ++i) {
+    const_loc.push_back(wrap_args[i + 3].operator int());
+  }
+  std::sort(const_loc.begin(), const_loc.end());
+  // wrapped function
+  // This is the function that called by the user.
+  auto wrapped = [f, fset_stream, const_loc](TVMArgs args, TVMRetValue* rv) {
+    std::shared_ptr<TVMFunctor> func =
+      std::make_shared<TVMFunctor>(f, fset_stream);
+    std::vector<Engine::VarHandle> const_vars, mutate_vars;
+    func->Init(args, const_loc, &const_vars, &mutate_vars);
+    Engine *engine = Engine::Get();
+    engine->DeduplicateVarHandle(&const_vars, &mutate_vars);
+    engine->PushSync([func](RunContext ctx) {
+        func->Run(ctx);
+      }, func->ctx(), const_vars, mutate_vars);
+  };
+  *wrap_rv = PackedFunc(wrapped);
+}
+
+}  // namespace mxnet
+
+// C callback that can be used by TVM to extract
+// the WrapAsyncCall function.
+extern "C" MXNET_DLL int MXTVMBridge(TVMFunctionHandle pregister) {
+  using tvm::runtime::PackedFunc;
+  const PackedFunc& fregister =
+      *static_cast<PackedFunc*>(pregister);
+  fregister("WrapAsyncCall", PackedFunc(mxnet::WrapAsyncCall));
+  return 0;
+}
diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc
index 7b0fbae3bccb..0b8ce69062bd 100644
--- a/src/operator/regression_output.cc
+++ b/src/operator/regression_output.cc
@@ -23,6 +23,8 @@
 */
 
 #include "./regression_output-inl.h"
+#include "./elemwise_op_common.h"
+
 
 #define MXNET_OPERATOR_REGISTER_REGRESSION_FWD(__name$, __kernel$, __bwdop$)   \
   NNVM_REGISTER_OP(__name$)                                                    \
@@ -33,6 +35,7 @@
       return std::vector<std::string>{"data", "label"};                        \
     })                                                                         \
   .set_attr<nnvm::FInferShape>("FInferShape", RegressionOpShape)               \
+  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)                \
   .set_attr<nnvm::FGradient>("FGradient", RegressionOpGrad{__bwdop$})          \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                            \
   [](const NodeAttrs& attrs){                                                  \
@@ -48,6 +51,7 @@
   .set_num_inputs(2)                                                       \
   .set_num_outputs(2)                                                      \
   .set_attr_parser(ParamParser<RegressionOutputParam>)                     \
+  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 2>)            \
   .set_attr<nnvm::TIsBackward>("TIsBackward", true)                        \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                        \
   [](const NodeAttrs& attrs){                                              \
diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
index 2483e62b99b5..bd1a00839167 100644
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@@ -12,3 +12,9 @@ COPY install/ubuntu_install_r.sh /install/
 RUN /install/ubuntu_install_r.sh
 COPY install/ubuntu_install_perl.sh /install/
 RUN /install/ubuntu_install_perl.sh
+
+COPY install/ubuntu_install_llvm.sh /install/
+RUN /install/ubuntu_install_llvm.sh
+
+COPY install/ubuntu_install_tvm.sh /install/
+RUN /install/ubuntu_install_tvm.sh
diff --git a/tests/ci_build/install/ubuntu_install_llvm.sh b/tests/ci_build/install/ubuntu_install_llvm.sh
new file mode 100755
index 000000000000..d3282e7a5fce
--- /dev/null
+++ b/tests/ci_build/install/ubuntu_install_llvm.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+
+echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
+     >> /etc/apt/sources.list.d/llvm.list
+echo deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
+     >> /etc/apt/sources.list.d/llvm.list
+
+wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
+apt-get update && apt-get install -y --force-yes llvm-5.0
diff --git a/tests/ci_build/install/ubuntu_install_tvm.sh b/tests/ci_build/install/ubuntu_install_tvm.sh
new file mode 100755
index 000000000000..2729c7fe3bee
--- /dev/null
+++ b/tests/ci_build/install/ubuntu_install_tvm.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Build and install TVM
+cd /tmp
+git clone https://github.com/dmlc/tvm/ --recursive
+cd tvm
+
+# This is a stable tag that support MXNet TVM bridge.
+# We use this since support for mxnet bridge just checked
+# into master and there is yet a version tag
+git checkout 30eaf463e34d7c301357c31a010945d11df16537
+
+cp make/config.mk
+echo USE_CUDA=1 >> config.mk
+echo LLVM_CONFIG=llvm-config-5.0 >> config.mk
+echo USE_RPC=1 >> config.mk
+echo USE_GRAPH_RUNTIME=1 >> config.mk
+echo CUDA_PATH=/usr/local/cuda >> config.mk
+make -j`nproc`
+
+cd python
+python setup.py install
+cd -
+
+cd topi/python
+python setup.py install
+cd -
diff --git a/tests/python/gpu/test_tvm_bridge.py b/tests/python/gpu/test_tvm_bridge.py
new file mode 100644
index 000000000000..292b9d91e5f7
--- /dev/null
+++ b/tests/python/gpu/test_tvm_bridge.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test TVM bridge, only enable this when TVM is available"""
+import logging
+import mxnet as mx
+import numpy as np
+
+def test_tvm_bridge():
+    # only enable test if TVM is available
+    try:
+        import tvm
+        import tvm.contrib.mxnet
+        import topi
+    except ImportError:
+        logging.warn("TVM bridge test skipped because TVM is missing...")
+        return
+
+    def check(target):
+        shape = (20,)
+        scale = tvm.var("scale", dtype="float32")
+        x = tvm.placeholder(shape)
+        y = tvm.placeholder(shape)
+        z = tvm.compute(shape, lambda i: x[i] + y[i])
+        zz = tvm.compute(shape, lambda *i: z(*i) * scale)
+        ctx = mx.gpu(0) if target == "cuda" else mx.cpu(0)
+        target = tvm.target.create(target)
+
+        # build the function
+        with target:
+            s = topi.generic.schedule_injective(zz)
+            f = tvm.build(s, [x, y, zz, scale])
+
+        # get a mxnet version
+        mxf = tvm.contrib.mxnet.to_mxnet_func(f, const_loc=[0, 1])
+        xx = mx.nd.uniform(shape=shape, ctx=ctx)
+        yy = mx.nd.uniform(shape=shape, ctx=ctx)
+        zz = mx.nd.empty(shape=shape, ctx=ctx)
+        # invoke myf: this runs in mxnet engine
+        mxf(xx, yy, zz, 10.0)
+        np.testing.assert_allclose(
+            zz.asnumpy(), (xx.asnumpy() + yy.asnumpy()) * 10)
+
+    check("llvm")
+    check("cuda")
+
+
+
+if __name__ == "__main__":
+    test_tvm_bridge()
diff --git a/tests/python/onnx_test_utils/backend.py b/tests/python/onnx_test_utils/backend.py
new file mode 100644
index 000000000000..33f266598284
--- /dev/null
+++ b/tests/python/onnx_test_utils/backend.py
@@ -0,0 +1,183 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=too-many-locals,invalid-name
+"""backend wrapper for onnx test infrastructure"""
+from collections import namedtuple
+import mxnet as mx
+from onnx import helper, TensorProto
+from onnx.backend.base import Backend
+from mxnet.contrib.onnx._import.import_onnx import GraphProto
+from .backend_rep import MXNetBackendRep
+
+# Using these functions for onnx test infrastructure.
+# Implemented by following onnx docs guide:
+# https://github.com/onnx/onnx/blob/master/docs/Implementing%20an%20ONNX%20backend.md
+# MXNetBackend class will take an ONNX model with inputs, perform a computation,
+# and then return the output.
+
+class MXNetBackend(Backend):
+    """MXNet backend for ONNX"""
+    
+    @staticmethod
+    def make_graph(node, inputs):
+        """ Created ONNX GraphProto from node"""
+        initializer = []
+        tensor_input_info = []
+        tensor_output_info = []
+
+        # Adding input tensor info.
+        for index in range(len(node.input)):
+            tensor_input_info.append(
+                helper.make_tensor_value_info(str(node.input[index]), TensorProto.FLOAT, [1]))
+
+            # Creating an initializer for Weight params.
+            # Assumes that weight params is named as 'W'.
+            # TODO: Handle multiple weight params.
+            # TODO: Add for "bias" if needed
+            if node.input[index] == 'W':
+                dim = inputs[index].shape
+                param_tensor = helper.make_tensor(
+                    name=node.input[index],
+                    data_type=TensorProto.FLOAT,
+                    dims=dim,
+                    vals=inputs[index].flatten())
+
+                initializer.append(param_tensor)
+
+        # Adding output tensor info.
+        for index in range(len(node.output)):
+            tensor_output_info.append(
+                helper.make_tensor_value_info(str(node.output[index]), TensorProto.FLOAT, [1]))
+
+        # creating graph proto object.
+        graph_proto = helper.make_graph(
+            [node],
+            "test",
+            tensor_input_info,
+            tensor_output_info,
+            initializer=initializer)
+
+        return graph_proto
+    
+    @classmethod
+    def run_node(cls, node, inputs, device='CPU'):
+        """Running individual node inference on mxnet engine and
+        return the result to onnx test infrastructure.
+
+        Parameters
+        ----------
+        node   : onnx node object
+            loaded onnx node (individual layer)
+        inputs : numpy array
+            input to run a node on
+        device : 'CPU'
+            device to run a node on
+
+        Returns
+        -------
+        params : numpy array
+            result obtained after running the operator
+        """
+        graph = GraphProto()
+        sym, params = graph.from_onnx(MXNetBackend.make_graph(node, inputs))
+        data_names = [i for i in sym.get_internals().list_inputs() if i[:-1] == "input_"]
+        data_shapes = []
+        dim_change_op_types = set(['ReduceMin', 'ReduceMax', 'ReduceMean',
+                               'ReduceProd', 'ReduceSum', 'Slice', 'Pad',
+                               'Squeeze', 'Upsample', 'Reshape', 'Conv'])
+
+        # Adding extra dimension of batch_size 1 if the batch_size is different for multiple inputs.
+        for idx, input_name in enumerate(data_names):
+            batch_size = 1
+            if len(inputs[idx].shape) < 4 and len(inputs) > 1 and \
+                            len(set(x.shape[0] for x in inputs)) != 1:
+                tuples = ((batch_size,), inputs[idx].shape)
+                new_shape = sum(tuples, ())
+                data_shapes.append((input_name, new_shape))
+            else:
+                data_shapes.append((input_name, inputs[idx].shape))
+
+        # create module, passing cpu context
+        if device == 'CPU':
+            ctx = mx.cpu()
+        else:
+            raise NotImplementedError("Only CPU context is supported for now")
+
+        # create a module
+        mod = mx.mod.Module(symbol=sym, data_names=data_names, context=ctx, label_names=None)
+        mod.bind(for_training=False, data_shapes=data_shapes, label_shapes=None)
+
+        # initializing parameters for calculating result of each individual node
+        if int(len(params)) > 0:
+            mod.set_params(arg_params=params, aux_params=params)
+        else:
+            mod.init_params()
+
+        batch = namedtuple('Batch', ['data'])
+
+        data_forward = []
+        for idx, input_name in enumerate(data_names):
+            # slice and pad operator tests needs 1 less dimension in forward pass
+            # otherwise it will throw an error.
+            # for squeeze operator, need to retain shape of input as provided
+            val = inputs[idx]
+            if node.op_type in dim_change_op_types:
+                data_forward.append(mx.nd.array(val))
+            else:
+                data_forward.append(mx.nd.array([val]))
+
+        mod.forward(batch(data_forward))
+        result = mod.get_outputs()[0].asnumpy()
+        if node.op_type in dim_change_op_types:
+            return [result]
+        return result
+
+    @classmethod
+    def prepare(cls, model, device='CPU', **kwargs):
+        """For running end to end model(used for onnx test backend)
+
+        Parameters
+        ----------
+        model  : onnx ModelProto object
+            loaded onnx graph
+        device : 'CPU'
+            specifying device to run test on
+        kwargs :
+            other arguments
+
+        Returns
+        -------
+        MXNetBackendRep : object
+            Returns object of MXNetBackendRep class which will be in turn
+            used to run inference on the input model and return the result for comparison.
+        """
+        graph = GraphProto()
+        sym, params = graph.from_onnx(model.graph)
+        return MXNetBackendRep(sym, params, device)
+
+    @classmethod
+    def supports_device(cls, device):
+        """Supports only CPU for testing"""
+        return device == 'CPU'
+
+prepare = MXNetBackend.prepare
+
+run_node = MXNetBackend.run_node
+
+supports_device = MXNetBackend.supports_device
diff --git a/tests/python/onnx_test_utils/backend_rep.py b/tests/python/onnx_test_utils/backend_rep.py
new file mode 100644
index 000000000000..690054238667
--- /dev/null
+++ b/tests/python/onnx_test_utils/backend_rep.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=too-few-public-methods
+"""backend rep for onnx test infrastructure"""
+from collections import namedtuple
+import numpy as np
+from onnx.backend.base import BackendRep
+import mxnet as mx
+
+# Using these functions for onnx test infrastructure.
+# Implemented by following onnx docs guide:
+# https://github.com/onnx/onnx/blob/master/docs/Implementing%20an%20ONNX%20backend.md
+# MXNetBackendRep object will be returned by MXNetBackend's prepare method which is used to
+# execute a model repeatedly.
+# Inputs will be passed to the run method of MXNetBackendRep class, it will perform computation and
+# retrieve the corresponding results for comparison to the onnx backend.
+# https://github.com/onnx/onnx/blob/master/onnx/backend/test/runner/__init__.py.
+
+class MXNetBackendRep(BackendRep):
+    """Running model inference on mxnet engine and return the result
+     to onnx test infrastructure for comparison."""
+    def __init__(self, symbol, params, device):
+        self.symbol = symbol
+        self.params = params
+        self.device = device
+
+    def run(self, inputs, **kwargs):
+        """Run model inference and return the result
+
+        Parameters
+        ----------
+        inputs : numpy array
+            input to run a layer on
+
+        Returns
+        -------
+        params : numpy array
+            result obtained after running the inference on mxnet
+        """
+        input_data = np.asarray(inputs[0], dtype='f')
+
+        # create module, passing cpu context
+        if self.device == 'CPU':
+            ctx = mx.cpu()
+        else:
+            raise NotImplementedError("Only CPU context is supported for now")
+
+        mod = mx.mod.Module(symbol=self.symbol, data_names=['input_0'], context=ctx,
+                            label_names=None)
+        mod.bind(for_training=False, data_shapes=[('input_0', input_data.shape)],
+                 label_shapes=None)
+        mod.set_params(arg_params=self.params, aux_params=None)
+
+        # run inference
+        batch = namedtuple('Batch', ['data'])
+
+        mod.forward(batch([mx.nd.array(input_data)]))
+        result = mod.get_outputs()[0].asnumpy()
+        return [result]
diff --git a/tests/python/unittest/onnx_backend_test.py b/tests/python/unittest/onnx_backend_test.py
new file mode 100644
index 000000000000..155b835c28b9
--- /dev/null
+++ b/tests/python/unittest/onnx_backend_test.py
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""onnx test backend wrapper"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import unittest
+
+import onnx.backend.test
+#from onnx_test_utils 
+import backend as mxnet_backend
+
+# This is a pytest magic variable to load extra plugins
+pytest_plugins = "onnx.backend.test.report",
+
+backend_test = onnx.backend.test.BackendTest(mxnet_backend, __name__)
+
+# Not implemented
+implemented_operators = [
+    'test_abs*',
+    'test_add*',
+    'test_neg*',
+    'test_relu*',
+    'test_reshape_*',
+    'test_sqrt*',
+    'test_sub*',
+    'test_sum*',
+    'test_div*',
+    'test_tanh*',
+    'test_exp*',
+    'test_floor*'
+    ]
+
+for op_test in implemented_operators:
+    backend_test.include(op_test)
+
+# import all test cases at global scope to make them visible to python.unittest
+globals().update(backend_test
+                 .enable_report()
+                 .test_cases)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/python/unittest/test_super_resolution.py b/tests/python/unittest/test_super_resolution.py
new file mode 100644
index 000000000000..0fdfa63a63d6
--- /dev/null
+++ b/tests/python/unittest/test_super_resolution.py
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Testing super_resolution model conversion"""
+from __future__ import absolute_import as _abs
+from __future__ import print_function
+from collections import namedtuple
+import mxnet as mx
+from mxnet.test_utils import download
+import mxnet.contrib.onnx._import as onnx_mxnet
+import numpy as np
+from PIL import Image
+
+model_url = 'https://s3.amazonaws.com/onnx-mxnet/examples/super_resolution.onnx'
+
+download(model_url, 'super_resolution.onnx')
+
+print("Converting onnx format to mxnet's symbol and params...")
+sym, params = onnx_mxnet.import_model('super_resolution.onnx')
+
+# Load test image
+input_image_dim = 224
+output_image_dim = 672
+img_url = 'https://s3.amazonaws.com/onnx-mxnet/examples/super_res_input.jpg'
+download(img_url, 'super_res_input.jpg')
+img = Image.open('super_res_input.jpg').resize((input_image_dim, input_image_dim))
+img_ycbcr = img.convert("YCbCr")
+img_y, img_cb, img_cr = img_ycbcr.split()
+x = np.array(img_y)[np.newaxis, np.newaxis, :, :]
+
+# create module
+mod = mx.mod.Module(symbol=sym, data_names=['input_0'], label_names=None)
+mod.bind(for_training=False, data_shapes=[('input_0', x.shape)])
+mod.set_params(arg_params=params, aux_params=None)
+
+# run inference
+Batch = namedtuple('Batch', ['data'])
+mod.forward(Batch([mx.nd.array(x)]))
+
+# Save the result
+img_out_y = Image.fromarray(np.uint8(mod.get_outputs()[0][0][0].asnumpy().clip(0, 255)), mode='L')
+
+result_img = Image.merge(
+    "YCbCr", [img_out_y,
+              img_cb.resize(img_out_y.size, Image.BICUBIC),
+              img_cr.resize(img_out_y.size, Image.BICUBIC)]).convert("RGB")
+result_img.save("super_res_output.jpg")