apache · mbrookhart · Jan 3, 2019 · Jan 30, 2019 · Feb 4, 2019 · Feb 8, 2019
@@ -29,3 +29,6 @@
 [submodule "3rdparty/onnx-tensorrt"]
 	path = 3rdparty/onnx-tensorrt
 	url = https://github.com/onnx/onnx-tensorrt.git
+[submodule "3rdparty/ngraph-mxnet-bridge"]
+	path = 3rdparty/ngraph-mxnet-bridge
+	url = https://github.com/NervanaSystems/ngraph-mxnet-bridge
diff --git a/3rdparty/ngraph-mxnet-bridge b/3rdparty/ngraph-mxnet-bridge
@@ -28,6 +28,7 @@ mxnet_option(USE_CUDNN            "Build with cudnn support"  ON) # one could se
 mxnet_option(USE_SSE              "Build with x86 SSE instruction support" ON IF NOT ARM)
 mxnet_option(USE_F16C             "Build with x86 F16C instruction support" ON) # autodetects support if ON
 mxnet_option(USE_LAPACK           "Build with lapack support" ON)
+mxnet_option(USE_NGRAPH           "Build with nGraph support" OFF)
 mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
 mxnet_option(USE_MKLML_MKL        "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE))
 mxnet_option(USE_MKLDNN           "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING))
@@ -230,7 +231,6 @@ if(ENABLE_TESTCOVERAGE)
   if(NOT GCOV_PATH)
     message(FATAL_ERROR "gcov not found! Aborting...")
   endif() # NOT GCOV_PATH
-
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
   set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} --coverage")

@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+.DEFAULT_GOAL := all
 ROOTDIR = $(CURDIR)
 TPARTYDIR = $(ROOTDIR)/3rdparty
 
@@ -78,13 +79,17 @@ ifeq ($(USE_MKLDNN), 1)
 	MKLDNNROOT = $(ROOTDIR)/3rdparty/mkldnn/build/install
 	MKLROOT = $(ROOTDIR)/3rdparty/mkldnn/build/install
 	export USE_MKLML = 1
+	MKLDNN_INCLUDE_DIR = $(MKLDNNROOT)/include
+	MKLDNN_LIB_DIR = $(MKLDNNROOT)/lib
 endif
 
 include $(TPARTYDIR)/mshadow/make/mshadow.mk
 include $(DMLC_CORE)/make/dmlc.mk
 
-# all tge possible warning tread
-WARNFLAGS= -Wall -Wsign-compare
+include 3rdparty/ngraph-mxnet-bridge/ngraph.mk
+
+# all the possible warning tread
+WARNFLAGS= -Wall -Wsign-compare -Wno-comment
 CFLAGS = -DMSHADOW_FORCE_STREAM $(WARNFLAGS)
 
 ifeq ($(DEV), 1)
@@ -101,6 +106,10 @@ endif
 CFLAGS += -I$(TPARTYDIR)/mshadow/ -I$(TPARTYDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(TPARTYDIR)/tvm/include -Iinclude $(MSHADOW_CFLAGS)
 LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
 
+ifeq ($(USE_NGRAPH),1)
+    CFLAGS += $(NGRAPH_CFLAGS)
+endif
+
 ifeq ($(ENABLE_TESTCOVERAGE), 1)
         CFLAGS += --coverage
         LDFLAGS += --coverage
@@ -411,6 +420,10 @@ else
 	EXTRA_CUOBJ =
 endif
 
+ifeq ($(USE_NGRAPH), 1)
+	EXTRA_OBJ += $(NGRAPH_BRIDGE_OBJ)
+endif
+
 # plugin
 PLUGIN_OBJ =
 PLUGIN_CUOBJ =
@@ -473,23 +486,23 @@ endif
 # For quick compile test, used smaller subset
 ALLX_DEP= $(ALL_DEP)
 
-build/src/%.o: src/%.cc | mkldnn
+build/src/%.o: src/%.cc | mkldnn ngraph
 	@mkdir -p $(@D)
 	$(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@
 
-build/src/%_gpu.o: src/%.cu | mkldnn
+build/src/%_gpu.o: src/%.cu | mkldnn ngraph
 	@mkdir -p $(@D)
 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" --generate-dependencies -MT build/src/$*_gpu.o $< >build/src/$*_gpu.d
 	$(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" $<
 
 # A nvcc bug cause it to generate "generic/xxx.h" dependencies from torch headers.
 # Use CXX to generate dependency instead.
-build/plugin/%_gpu.o: plugin/%.cu
+build/plugin/%_gpu.o: plugin/%.cu | ngraph
 	@mkdir -p $(@D)
 	$(CXX) -std=c++11 $(CFLAGS) -MM -MT build/plugin/$*_gpu.o $< >build/plugin/$*_gpu.d
 	$(NVCC) -c -o $@ $(NVCCFLAGS) $(CUDA_ARCH) -Xcompiler "$(CFLAGS)" $<
 
-build/plugin/%.o: plugin/%.cc | mkldnn
+build/plugin/%.o: plugin/%.cc | mkldnn ngraph
 	@mkdir -p $(@D)
 	$(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@
 
@@ -515,7 +528,9 @@ lib/libmxnet.a: $(ALLX_DEP)
 
 lib/libmxnet.so: $(ALLX_DEP)
 	@mkdir -p $(@D)
-	$(CXX) $(CFLAGS) -shared -o $@ $(filter-out %libnnvm.a, $(filter %.o %.a, $^)) $(LDFLAGS) \
+	$(CXX) $(CFLAGS) -shared -o $@ $(filter-out %libnnvm.a, $(filter %.o %.a, $^)) \
+	  $(NGRAPH_LDFLAGS_FOR_SHARED_LIBS) \
+	  $(LDFLAGS) \
 	-Wl,${WHOLE_ARCH} $(filter %libnnvm.a, $^) -Wl,${NO_WHOLE_ARCH}
 ifeq ($(USE_MKLDNN), 1)
 ifeq ($(UNAME_S), Darwin)
@@ -544,7 +559,9 @@ bin/im2rec: tools/im2rec.cc $(ALLX_DEP)
 
 $(BIN) :
 	@mkdir -p $(@D)
-	$(CXX) $(CFLAGS) -std=c++11  -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)
+	$(CXX) $(CFLAGS) -std=c++11  -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) \
+	  $(LDFLAGS) \
+	  $(NGRAPH_LDFLAGS_FOR_PROGS_IN_BIN)
 
 # CPP Package
 ifeq ($(USE_CPP_PACKAGE), 1)
@@ -656,7 +673,7 @@ clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN)
 	$(RM) -r  $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS))
 	$(RM) -r  $(patsubst %, %/*.o, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.o, $(EXTRA_OPERATORS))
 else
-clean: rclean mkldnn_clean cyclean testclean $(EXTRA_PACKAGES_CLEAN)
+clean: rclean ngraph_clean mkldnn_clean cyclean testclean $(EXTRA_PACKAGES_CLEAN)
 	$(RM) -r build lib bin *~ */*~ */*/*~ */*/*/*~ 
 	(cd scala-package && mvn clean) || true
 	cd $(DMLC_CORE); $(MAKE) clean; cd -

diff --git a/NGRAPH_README.md b/NGRAPH_README.md
@@ -0,0 +1,55 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# nGraph - MXNet Integration
+MXNet nGraph integration is based on [Unified integration with external backend libraries](https://cwiki.apache.org/confluence/display/MXNET/Unified+integration+with+external+backend+libraries)
+
+After building MXNet with nGraph support, users can enable nGraph backend by setting `MXNET_SUBGRAPH_BACKEND="ngraph"`environmental variable. 
+
+Gluon support is experimental and may or may not yield good performance. Gluon-NGraph 
+integration can be enabled by setting the environmental variable `MXNET_NGRAPH_GLUON=1`
+
+## Building with nGraph support
+MXNet's experimental support for the Intel nGraph graph compiler can be enabled
+using MXNet's build system. Current support is for Linux-based OS's, Mac and Windows
+support will be added in future releases.
+
+When building MXNet with experimental nGraph integration enabled, MXNet's build
+system builds its own copy of the nGraph-supplied libraries.  Upon successful
+completion of an nGraph-enabled build, these libraries and related symbolic links
+can be found in the same build directory as `libmxnet.so`.
+
+If building with gnu make, use the command:
+
+`make -j USE_NGRAPH=1`
+
+If building with cmake, use the command:
+
+`mkdir build && cd build && cmake ../ -DUSE_NGRAPH=1 && make -j`
+
+## Runtime environment variables
+Some environment variables influence the behavior of the
+nGraph-enabled MXNet software and supporting libraries.  Here is a partial list of those variables:
+
+| Variable  | Description |
+| :-------- | :---------- |
+| `OMP_NUM_THREADS`            | Suggested value: `16`.  For more information please see [here](https://software.intel.com/en-us/mkl-windows-developer-guide-setting-the-number-of-threads-using-an-openmp-environment-variable) |
+| `KMP_AFFINITY`               | Suggested value: `granularity=fine,compact,1,0`.  For more information please see [here](https://software.intel.com/en-us/node/522691). |
+| `MXNET_NGRAPH_VERBOSE_GRAPH` | When set to `1`, nGraph-enabled MXNet will create in the current directory a JSON file representing each subgraph being compiled by the nGraph library.  Each of these JSON files is a graph serialization that can be loaded by nGraph's `ngraph::deserialize`  functions. |
+
+## Supported nGraph back-ends
+The nGraph library supports a number of hardware and software backends, including `"CPU"`, `"INTERPETER"` (reference kernels), `"GPU"`, and `"IntelGPU"`. Current experimental integration enables `"CPU"` backend by default. More backends will be supported in future releases.
diff --git a/amalgamation/amalgamation.py b/amalgamation/amalgamation.py
@@ -30,7 +30,8 @@
     'opencv2/opencv.hpp', 'sys/stat.h', 'sys/types.h', 'cuda.h', 'cuda_fp16.h', 'omp.h',
     'onnx/onnx.pb.h', 'execinfo.h', 'packet/sse-inl.h', 'emmintrin.h', 'thrust/device_vector.h',
     'cusolverDn.h', 'internal/concurrentqueue_internal_debug.h', 'relacy/relacy_std.hpp',
-    'relacy_shims.h', 'ittnotify.h', 'shared_mutex'
+    'relacy_shims.h', 'ittnotify.h', 'shared_mutex', 'ngraph/ngraph.hpp', 'ngraph_imperative.h',
+    'ngraph_nnvm_utils.h',
     ]
 
 minimum = int(sys.argv[6]) if len(sys.argv) > 5 else 0

@@ -564,6 +564,18 @@ build_ubuntu_cpu_mkldnn_mkl() {
         -j$(nproc)
 }
 
+build_ubuntu_cpu_ngraph() {
+    set -ex
+
+    build_ccache_wrappers
+
+    make  \
+        ENABLE_TESTCOVERAGE=1         \
+        USE_BLAS=openblas             \
+        USE_NGRAPH=1                  \
+        -j$(nproc)
+}
+
 build_ubuntu_gpu() {
     build_ubuntu_gpu_cuda91_cudnn7
 }
@@ -834,6 +846,16 @@ unittest_ubuntu_tensorrt_gpu() {
     nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_trt_gpu.xml --verbose --nocapture tests/python/tensorrt/
 }
 
+unittest_ubuntu_cpu_ngraph() {
+    set -ex
+    export MXNET_SUBGRAPH_BACKEND="ngraph"
+    export PYTHONPATH=./python/
+    export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
+    export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH
+    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest -e "test_monitor" -e "test_op_output_names_monitor" -e "test_op_all_names_monitor" -e "test_zero_prop"
+    nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_ngraph.xml --verbose tests/python/ngraph
+}
+
 # quantization gpu currently only runs on P3 instances
 # need to separte it from unittest_ubuntu_python2_gpu()
 unittest_ubuntu_python2_quantization_gpu() {

@@ -34,6 +34,7 @@ mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/li
 mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests'
 mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
 mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
+mx_ngraph_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, lib/libcpu_backend.so, lib/libngraph.so, lib/libtbb.so.2, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
 mx_tensorrt_lib = 'build/libmxnet.so, lib/libnvonnxparser_runtime.so.0, lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so'
 mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a, build/cpp-package/example/*'
 mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/cpp-package/example/*'
@@ -119,6 +120,20 @@ def compile_unix_openblas_debug_cpu() {
     }]
 }
 
+def compile_unix_ngraph_cpu() {
+    return ['CPU: NGRAPH': {
+      node(NODE_LINUX_CPU) {
+        ws('workspace/build-ngraph-cpu') {
+          timeout(time: max_time, unit: 'MINUTES') {
+            utils.init_git()
+            utils.docker_run('ubuntu_cpu', 'build_ubuntu_cpu_ngraph', false)
+            utils.pack_lib('ngraph_cpu', mx_ngraph_lib, true)
+          }
+        }
+      }
+    }]
+}
+
 def compile_unix_mkl_cpu() {
     return ['CPU: MKL': {
       node(NODE_LINUX_CPU) {
@@ -717,6 +732,24 @@ def test_unix_python2_mkldnn_cpu() {
     }]
 }
 
+def test_unix_python3_ngraph_cpu() {
+    return ['Python3: nGraph-CPU': {
+      node(NODE_LINUX_CPU) {
+        ws('workspace/build-ngraph-cpu') {
+          timeout(time: max_time, unit: 'MINUTES') {
+            try {
+              utils.unpack_and_init('ngraph_cpu', mx_ngraph_lib, true)
+              utils.docker_run('ubuntu_cpu', 'unittest_ubuntu_cpu_ngraph', false)
+              utils.publish_test_coverage()
+            } finally {
+              utils.collect_test_results_unix('nosetests_unittest.xml', 'nosetests_python3_ngraph_cpu.xml')
+            }
+          }
+        }
+      }
+    }]
+}
+
 def test_unix_python3_mkldnn_cpu() {
     return ['Python3: MKLDNN-CPU': {
       node(NODE_LINUX_CPU) {

@@ -37,6 +37,7 @@ core_logic: {
     custom_steps.compile_unix_cpu_openblas(),
     custom_steps.compile_unix_openblas_debug_cpu(),
     custom_steps.compile_unix_mkl_cpu(),
+    custom_steps.compile_unix_ngraph_cpu(),
     custom_steps.compile_unix_mkldnn_cpu(),
     custom_steps.compile_unix_mkldnn_mkl_cpu()
   ])
@@ -48,6 +49,7 @@ core_logic: {
     custom_steps.test_unix_python3_mkl_cpu(),
     custom_steps.test_unix_python2_mkldnn_cpu(),
     custom_steps.test_unix_python3_mkldnn_cpu(),
+    custom_steps.test_unix_python3_ngraph_cpu(),
     custom_steps.test_unix_python3_mkldnn_mkl_cpu(),
     custom_steps.test_unix_scala_cpu(),
     custom_steps.test_unix_scala_mkldnn_cpu(),

@@ -92,6 +92,7 @@ USE_OPENCV = 1
 
 #whether use libjpeg-turbo for image decode without OpenCV wrapper
 USE_LIBJPEG_TURBO = 0
+
 #add the path to libjpeg-turbo library
 USE_LIBJPEG_TURBO_PATH = NONE
 
@@ -103,6 +104,9 @@ USE_OPENMP = 1
 # you can disable it explicity with USE_MKLDNN = 0
 USE_MKLDNN =
 
+# whether to use the nGraph library
+USE_NGRAPH = 0
+
 # whether use NNPACK library
 USE_NNPACK = 0
 

diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc
@@ -32,6 +32,9 @@
 #include "../operator/operator_common.h"
 #include "../executor/exec_pass.h"
 #include "../operator/subgraph/subgraph_property.h"
+#if MXNET_USE_NGRAPH == 1
+#include <ngraph_imperative.h>
+#endif
 
 namespace mxnet {
 namespace op {
@@ -76,6 +79,10 @@ int MXListAllOpNames(nn_uint *out_size,
                      const char ***out_array) {
   mxnet::op::RegisterLegacyOpProp();
   mxnet::op::RegisterLegacyNDFunc();
+#if MXNET_USE_NGRAPH == 1
+  // ngraph imperative interface
+  ngraph_bridge::InitImperative();
+#endif
   return NNListAllOpNames(out_size, out_array);
 }
 

diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc
@@ -1532,6 +1532,7 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src,
   g = InferForwardAttrs(g, arg_shapes, arg_dtypes, arg_stypes, default_ctx,
                         ctx_map, in_arg_ctxes, aux_state_ctxes);
   subgraph_prop->SetAttr("graph", g);
+  subgraph_prop->SetAttr("grad_reqs", grad_req_types);
   auto it = op::SubgraphPropertyOpNameSet::Get()->find(prop_name);
   // assign a op name set to the subgraph property if it has been provided by users
   if (it != op::SubgraphPropertyOpNameSet::Get()->end()) {
@@ -1661,10 +1662,12 @@ Executor *Executor::SimpleBind(nnvm::Symbol symbol,
                                std::unordered_map<std::string, NDArray>* shared_buffer,
                                Executor* shared_exec) {
   auto exec = new exec::GraphExecutor();
-  if (!exec->subgraph_property().empty()) {
+  if (!exec->subgraph_property().empty() && group2ctx.empty()) {
     symbol = exec::PartitionGraph(symbol, exec->subgraph_property(), arg_shape_map, arg_dtype_map,
                                   arg_stype_map, default_ctx, group2ctx, in_arg_ctxes,
                                   aux_state_ctxes, grad_req_types);
+  } else if (!group2ctx.empty()) {
+    LOG(WARNING) << "MXNET_SUBGRAPH_BACKEND does not currently support heterogeneous execution";
   }
   exec->Init(symbol, default_ctx, group2ctx,
              in_arg_ctxes, arg_grad_ctxes, aux_state_ctxes,
@@ -1686,8 +1689,12 @@ Executor *Executor::Bind(nnvm::Symbol symbol,
   auto exec = new exec::GraphExecutor();
   std::vector<NDArray> tmp_in_args = in_args;
   if (!exec->subgraph_property().empty()) {
-    symbol = exec::PartitionGraph(symbol, exec->subgraph_property(), &tmp_in_args, aux_states,
+    if (group2ctx.empty()) {
+      symbol = exec::PartitionGraph(symbol, exec->subgraph_property(), &tmp_in_args, aux_states,
                                   default_ctx, group2ctx, grad_req_type);
+    } else {
+      LOG(WARNING) << "MXNET_SUBGRAPH_BACKEND does not currently support heterogeneous execution";
+    }
   }
   exec->Init(symbol, default_ctx, group2ctx,
              tmp_in_args, arg_grad_store, grad_req_type, aux_states,