From c61a25f0ed83bd2cf0df67c9cce8901f1b04ed49 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 4 Nov 2021 09:00:39 +0100 Subject: [PATCH 01/10] [SRC] Re-format .cc .h files --- src/api/_api_internal/_api_internal.cc | 4 +- src/api/operator/numpy/np_tri_op.cc | 6 +- src/c_api/c_api.cc | 6 +- src/c_api/c_api_ndarray.cc | 3 +- src/c_api/c_api_symbolic.cc | 6 +- src/common/cuda/nvtx.h | 19 +- src/common/cuda/utils.h | 4 +- src/common/utils.h | 4 +- src/engine/naive_engine.cc | 4 +- src/engine/threaded_engine.h | 2 +- src/engine/threaded_engine_perdevice.cc | 10 +- src/engine/threaded_engine_pooled.cc | 6 +- src/imperative/attach_op_resource_pass.cc | 5 +- src/imperative/exec_pass.h | 2 +- src/imperative/imperative.cc | 19 +- src/imperative/imperative_utils.h | 10 +- src/io/iter_prefetcher.h | 6 +- src/kvstore/comm.h | 26 +- src/kvstore/gpu_topology.h | 4 +- src/kvstore/kvstore_dist.h | 12 +- src/kvstore/p3store_dist.h | 2 +- src/ndarray/ndarray.cc | 16 +- src/nnvm/gradient.cc | 10 +- src/nnvm/plan_memory.cc | 6 +- src/operator/contrib/adamw.cu | 2 +- src/operator/contrib/bilinear_resize-inl.h | 16 +- src/operator/contrib/bounding_box-inl.h | 30 +- src/operator/contrib/bounding_box.cu | 6 +- .../contrib/deformable_psroi_pooling.cc | 40 +- .../contrib/deformable_psroi_pooling.cu | 40 +- .../contrib/intgemm/prepare_weight_op.cc | 6 +- src/operator/contrib/multi_lamb.cc | 8 +- src/operator/contrib/multi_lamb.cu | 12 +- src/operator/contrib/multi_lans.cc | 8 +- src/operator/contrib/multi_lans.cu | 12 +- src/operator/contrib/multi_lars-inl.h | 8 +- src/operator/control_flow.cc | 6 +- src/operator/correlation.cc | 12 +- src/operator/leaky_relu.cc | 12 +- src/operator/mxnet_op.h | 56 +- src/operator/nn/batch_norm-inl.h | 10 +- src/operator/nn/batch_norm.cu | 32 +- src/operator/nn/concat.cc | 4 +- src/operator/nn/convolution.cc | 36 +- src/operator/nn/cudnn/cudnn_batch_norm.cu | 186 ++-- src/operator/nn/cudnn/cudnn_batch_norm.h | 14 +- src/operator/nn/cudnn/cudnn_convolution-inl.h | 831 +++++++++++++++++ .../nn/cudnn/cudnn_deconvolution-inl.h | 852 ++++++++++++++++++ src/operator/nn/cudnn/cudnn_pooling-inl.h | 48 +- src/operator/nn/dnnl/dnnl_base-inl.h | 6 +- src/operator/nn/dnnl/dnnl_base.cc | 41 +- src/operator/nn/dnnl/dnnl_convolution.cc | 4 +- src/operator/nn/dnnl/dnnl_deconvolution-inl.h | 10 +- src/operator/nn/dnnl/dnnl_fully_connected.cc | 6 +- src/operator/nn/dnnl/dnnl_rnn.cc | 41 +- src/operator/nn/pooling-inl.h | 12 +- src/operator/nn/pooling.cc | 33 +- src/operator/nn/softmax-inl.h | 30 +- src/operator/nn/softmax.cc | 6 +- src/operator/npx_control_flow.cc | 6 +- src/operator/numpy/linalg/np_lstsq.cc | 6 +- src/operator/numpy/linalg/np_norm.cc | 4 +- src/operator/numpy/np_bincount_op.cc | 6 +- src/operator/numpy/np_boolean_mask_assign.cc | 6 +- .../numpy/np_broadcast_reduce_op_value.h | 6 +- src/operator/numpy/np_delete_op-inl.h | 8 +- src/operator/numpy/np_delete_op.cc | 6 +- src/operator/numpy/np_einsum_op-inl.h | 4 +- .../numpy/np_elemwise_broadcast_logic_op.h | 21 +- src/operator/numpy/np_elemwise_broadcast_op.h | 4 +- .../numpy/np_elemwise_broadcast_op_add.cc | 37 +- .../numpy/np_elemwise_broadcast_op_add.cu | 5 +- .../numpy/np_elemwise_broadcast_op_mod.cc | 37 +- .../numpy/np_elemwise_broadcast_op_mod.cu | 5 +- .../numpy/np_elemwise_broadcast_op_mul.cc | 37 +- .../numpy/np_elemwise_broadcast_op_mul.cu | 4 +- .../numpy/np_elemwise_broadcast_op_pow.cc | 38 +- .../numpy/np_elemwise_broadcast_op_pow.cu | 5 +- .../numpy/np_elemwise_broadcast_op_scalar.cc | 32 +- .../numpy/np_elemwise_broadcast_op_scalar.cu | 16 +- .../numpy/np_elemwise_broadcast_op_sub.cc | 37 +- .../numpy/np_elemwise_broadcast_op_sub.cu | 4 +- src/operator/numpy/np_insert_op_scalar-inl.h | 6 +- src/operator/numpy/np_insert_op_slice-inl.h | 6 +- src/operator/numpy/np_insert_op_tensor-inl.h | 6 +- src/operator/numpy/np_interp_op.cc | 6 +- src/operator/numpy/np_moments_op.cc | 6 +- src/operator/numpy/np_percentile_op.cc | 6 +- src/operator/numpy/np_true_divide.cc | 6 +- src/operator/numpy/np_unique_op.cc | 7 +- src/operator/numpy/random/np_bernoulli_op.cc | 6 +- .../numpy/random/np_exponential_op.cc | 6 +- src/operator/numpy/random/np_pareto_op.cc | 6 +- src/operator/numpy/random/np_power_op.cc | 6 +- src/operator/numpy/random/np_rayleigh_op.cc | 6 +- src/operator/numpy/random/np_weibull_op.cc | 6 +- src/operator/optimizer_op-inl.h | 6 +- src/operator/optimizer_op.cc | 4 +- src/operator/optimizer_op.cu | 4 +- src/operator/random/sampler.h | 8 +- src/operator/random/shuffle_op.cu | 4 +- src/operator/sequence_last-inl.h | 12 +- src/operator/subgraph/build_subgraph.cc | 6 +- src/operator/subgraph/dnnl/dnnl_conv.cc | 15 +- src/operator/subgraph/dnnl/dnnl_fc.cc | 4 +- .../subgraph/tensorrt/nnvm_to_onnx.cc | 2 +- .../subgraph/tensorrt/onnx_to_tensorrt.h | 10 +- src/operator/subgraph/tensorrt/tensorrt-inl.h | 2 +- src/operator/tensor/amp_cast.cc | 12 +- src/operator/tensor/broadcast_reduce-inl.h | 6 +- src/operator/tensor/dot-inl.h | 12 +- src/operator/tensor/elemwise_binary_op-inl.h | 16 +- .../tensor/elemwise_binary_scalar_op.h | 4 +- src/operator/tensor/histogram.cc | 6 +- src/operator/tensor/la_op-inl.h | 20 +- src/operator/tensor/la_op.h | 12 +- src/operator/tensor/matrix_op.cu | 10 +- src/operator/tensor/reduce_rtc.cc | 12 +- src/operator/tensor/square_sum.cc | 2 +- src/operator/tensor/square_sum.cu | 2 +- src/profiler/aggregate_stats.cc | 12 +- src/runtime/container.cc | 4 +- src/serialization/cnpy.cc | 8 +- src/storage/pooled_storage_manager.h | 2 +- 124 files changed, 2520 insertions(+), 784 deletions(-) create mode 100644 src/operator/nn/cudnn/cudnn_convolution-inl.h create mode 100644 src/operator/nn/cudnn/cudnn_deconvolution-inl.h diff --git a/src/api/_api_internal/_api_internal.cc b/src/api/_api_internal/_api_internal.cc index dc0dac811037..82d86d105065 100644 --- a/src/api/_api_internal/_api_internal.cc +++ b/src/api/_api_internal/_api_internal.cc @@ -62,8 +62,8 @@ MXNET_REGISTER_GLOBAL("_ADT").set_body([](runtime::MXNetArgs args, runtime::MXNe ObjectRef input = NDArrayHandle(array); data.push_back(input); } else if (args[i].type_code() != kNull) { - ObjectRef input = String::CanConvertFrom(args[i]) ? args[i].operator String() - : args[i].operator ObjectRef(); + ObjectRef input = String::CanConvertFrom(args[i]) ? args[i].operator String() : + args[i].operator ObjectRef(); data.push_back(input); } else { data.emplace_back(nullptr); diff --git a/src/api/operator/numpy/np_tri_op.cc b/src/api/operator/numpy/np_tri_op.cc index 915c68ca4eb0..dacc8953bc43 100644 --- a/src/api/operator/numpy/np_tri_op.cc +++ b/src/api/operator/numpy/np_tri_op.cc @@ -39,9 +39,9 @@ MXNET_REGISTER_API("_npi.tri").set_body([](runtime::MXNetArgs args, runtime::MXN param.M = args[1].operator nnvm::dim_t(); } param.k = args[2].operator int(); - param.dtype = args[3].type_code() == kNull - ? mshadow::kFloat32 - : String2MXNetTypeWithBool(args[3].operator std::string()); + param.dtype = args[3].type_code() == kNull ? + mshadow::kFloat32 : + String2MXNetTypeWithBool(args[3].operator std::string()); if (args[4].type_code() != kNull) { attrs.dict["ctx"] = args[4].operator std::string(); } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 8bb2b54bcc8d..d69db4eebe23 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -2822,8 +2822,8 @@ int MXDataIterGetLabel(DataIterHandle handle, NDArrayHandle* out) { // TODO(tianjun) make label 1D when label_width=0 mxnet::TShape shape = no_label ? TShape({ 1, - }) - : db.data[1].shape(); + }) : + db.data[1].shape(); if (no_label || shape.Size() < 1) { // it's possible that label is not available and not required // but we need to bypass the invalid copy @@ -3947,7 +3947,7 @@ int MXShallowCopyNDArray(NDArrayHandle src_handle, NDArrayHandle* out) { API_END_HANDLE_ERROR(delete ret); } -int MXNVTXRangePush(const char * name, mx_uint color) { +int MXNVTXRangePush(const char* name, mx_uint color) { API_BEGIN(); #if MXNET_USE_CUDA && MXNET_USE_NVTX mxnet::common::cuda::nvtx::gpuRangeStart(color, name); diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc index 13c200cd0dd6..2e9c0a373621 100644 --- a/src/c_api/c_api_ndarray.cc +++ b/src/c_api/c_api_ndarray.cc @@ -334,8 +334,7 @@ int MXAutogradMarkVariables(uint32_t num_var, API_END(); } -int MXAutogradDropGrads(uint32_t num_var, - NDArrayHandle *var_handles) { +int MXAutogradDropGrads(uint32_t num_var, NDArrayHandle* var_handles) { API_BEGIN(); std::vector variables; variables.reserve(num_var); diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 1e12b3f6b46d..82cccd879511 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -1195,9 +1195,9 @@ int MXGenBackendSubgraph(SymbolHandle sym_handle, const auto& subgraph_prop_list = backend->GetSubgraphProperties(); for (auto property : subgraph_prop_list) { if (property->HasAttr("disable") && property->GetAttr("disable") == true) { - auto full_name = property->HasAttr("property_name") - ? property->GetAttr("property_name") - : std::string(); + auto full_name = property->HasAttr("property_name") ? + property->GetAttr("property_name") : + std::string(); LOG(INFO) << "subgraph property " << full_name << " from backend " << backend_name << " is disabled."; continue; diff --git a/src/common/cuda/nvtx.h b/src/common/cuda/nvtx.h index 4142ee112f1e..ae67c623fe41 100644 --- a/src/common/cuda/nvtx.h +++ b/src/common/cuda/nvtx.h @@ -34,8 +34,7 @@ namespace cuda { class NVTXDuration { public: - explicit NVTXDuration(const char *name) noexcept - : range_id_(0), name_(name) {} + explicit NVTXDuration(const char* name) noexcept : range_id_(0), name_(name) {} inline void start() { range_id_ = nvtxRangeStartA(name_); @@ -47,7 +46,7 @@ class NVTXDuration { private: nvtxRangeId_t range_id_; - const char *name_; + const char* name_; }; // Utility class for NVTX @@ -68,19 +67,19 @@ class nvtx { static void gpuRangeStart(const uint32_t rgb, const std::string& range_name) { nvtxEventAttributes_t att; - att.version = NVTX_VERSION; - att.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; - att.colorType = NVTX_COLOR_ARGB; - att.color = rgb | 0xff000000; - att.messageType = NVTX_MESSAGE_TYPE_ASCII; + att.version = NVTX_VERSION; + att.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + att.colorType = NVTX_COLOR_ARGB; + att.color = rgb | 0xff000000; + att.messageType = NVTX_MESSAGE_TYPE_ASCII; att.message.ascii = range_name.c_str(); nvtxRangePushEx(&att); } // Utility to map a range name prefix to a random color based on its hash static uint32_t nameToColor(const std::string& range_name, int prefix_len) { - static std::vector colors{kRed, kGreen, kBlue, kYellow, kOrange, kRed1, kMagenta, - kViolet, kBlue1, kCyan, kGreen1}; + static std::vector colors{ + kRed, kGreen, kBlue, kYellow, kOrange, kRed1, kMagenta, kViolet, kBlue1, kCyan, kGreen1}; std::string s(range_name, 0, prefix_len); std::hash hash_fn; return colors[hash_fn(s) % colors.size()]; diff --git a/src/common/cuda/utils.h b/src/common/cuda/utils.h index 0290fabe7aec..35330c445396 100644 --- a/src/common/cuda/utils.h +++ b/src/common/cuda/utils.h @@ -739,8 +739,8 @@ static inline __device__ void atomicAdd(mshadow::half::half_t* address, mshadow: mshadow::half::half_t hsum; hsum.half_ = reinterpret_cast(address) & 2 ? (old >> 16) : (old & 0xffff); hsum += val; - old = reinterpret_cast(address) & 2 ? (old & 0xffff) | (hsum.half_ << 16) - : (old & 0xffff0000) | hsum.half_; + old = reinterpret_cast(address) & 2 ? (old & 0xffff) | (hsum.half_ << 16) : + (old & 0xffff0000) | hsum.half_; old = atomicCAS(address_as_ui, assumed, old); } while (assumed != old); } diff --git a/src/common/utils.h b/src/common/utils.h index 15e676c816c9..180295a14902 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -711,8 +711,8 @@ FCompType GetFCompute(const nnvm::Op* op, const std::string& name, const Context */ template constexpr size_t MaxIntegerValue() { - return std::is_integral::value ? std::numeric_limits::max() - : size_t(2) << (std::numeric_limits::digits - 1); + return std::is_integral::value ? std::numeric_limits::max() : + size_t(2) << (std::numeric_limits::digits - 1); } template <> diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc index ad24af1dabe9..25841e072cda 100644 --- a/src/engine/naive_engine.cc +++ b/src/engine/naive_engine.cc @@ -254,8 +254,8 @@ class NaiveEngine final : public Engine { #endif /*! * \brief Holding a shared_ptr to the object pool to prevent it from being destructed too early - * See also #309 (https://github.com/apache/mxnet/issues/309) and similar fix in threaded_engine.h. - * Without this, segfaults seen on CentOS7 in + * See also #309 (https://github.com/apache/mxnet/issues/309) and similar fix in + * threaded_engine.h. Without this, segfaults seen on CentOS7 in * test_operator_gpu.py:test_convolution_multiple_streams */ std::shared_ptr > objpool_opr_ref_; diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h index a9e08a80aadc..4aebd08a6efb 100644 --- a/src/engine/threaded_engine.h +++ b/src/engine/threaded_engine.h @@ -368,7 +368,7 @@ class ThreadedEngine : public Engine { new profiler::ProfileOperator(threaded_opr->opr_name.c_str(), attrs.release())); opr_block->opr_profile->startForDevice(ctx.dev_type, ctx.dev_id); } - const bool debug_info = (engine_info_ && debug_push_opr_ == opr_block); + const bool debug_info = (engine_info_ && debug_push_opr_ == opr_block); if (debug_info) { LOG(INFO) << "ExecuteOprBlock " << opr_block << "shutdown_phase=" << shutdown_phase_; } diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc index b566e4417a41..79e8eaa53909 100644 --- a/src/engine/threaded_engine_perdevice.cc +++ b/src/engine/threaded_engine_perdevice.cc @@ -311,12 +311,10 @@ class ThreadedEnginePerDevice : public ThreadedEngine { while (task_queue->Pop(&opr_block)) { #if MXNET_USE_NVTX - auto nvtx_name = opr_block->opr->opr_name != "" ? opr_block->opr->opr_name : "Op"; - auto end_pos = nvtx_name.find('{'); - auto name_prefix_len = end_pos != std::string::npos - ? end_pos - : nvtx_name.size(); - auto color = common::cuda::nvtx::nameToColor(nvtx_name, name_prefix_len); + auto nvtx_name = opr_block->opr->opr_name != "" ? opr_block->opr->opr_name : "Op"; + auto end_pos = nvtx_name.find('{'); + auto name_prefix_len = end_pos != std::string::npos ? end_pos : nvtx_name.size(); + auto color = common::cuda::nvtx::nameToColor(nvtx_name, name_prefix_len); common::cuda::nvtx::gpuRangeStart(color, nvtx_name); #endif auto* info = ThreadedEngine::GPUWorkerSyncInfo::New(); diff --git a/src/engine/threaded_engine_pooled.cc b/src/engine/threaded_engine_pooled.cc index 0ec91b23e260..fd29f6daacc3 100644 --- a/src/engine/threaded_engine_pooled.cc +++ b/src/engine/threaded_engine_pooled.cc @@ -66,7 +66,7 @@ class ThreadedEnginePooled : public ThreadedEngine { thread_pool_ = nullptr; io_thread_pool_ = nullptr; streams_->Finalize(); - streams_ = nullptr; + streams_ = nullptr; } void Stop() override { @@ -154,8 +154,8 @@ class ThreadedEnginePooled : public ThreadedEngine { } bool is_copy = (opr_block->opr->prop == FnProperty::kCopyFromGPU || opr_block->opr->prop == FnProperty::kCopyToGPU); - auto&& rctx = is_copy ? streams_->GetIORunContext(opr_block->ctx) - : streams_->GetRunContext(opr_block->ctx); + auto&& rctx = is_copy ? streams_->GetIORunContext(opr_block->ctx) : + streams_->GetRunContext(opr_block->ctx); #if MXNET_USE_CUDA CallbackOnStart on_start; CallbackOnComplete callback; diff --git a/src/imperative/attach_op_resource_pass.cc b/src/imperative/attach_op_resource_pass.cc index f4ac4b1257bc..17d6d7a41dc3 100644 --- a/src/imperative/attach_op_resource_pass.cc +++ b/src/imperative/attach_op_resource_pass.cc @@ -52,8 +52,9 @@ void AttachOpResources(const Graph& g, const bool rsc_req = (fresource.count(op) != 0); const bool rsc_ex_req = (fresource_ex.count(op) != 0); if (rsc_req || rsc_ex_req) { - auto reqs = rsc_ex_req ? fresource_ex[op](inode.source->attrs, dev_masks[nid], vdispatch[nid]) - : fresource[op](inode.source->attrs); + auto reqs = rsc_ex_req ? + fresource_ex[op](inode.source->attrs, dev_masks[nid], vdispatch[nid]) : + fresource[op](inode.source->attrs); // Get the resource of temporal space. for (const ResourceRequest& req : reqs) { switch (req.type) { diff --git a/src/imperative/exec_pass.h b/src/imperative/exec_pass.h index acecd7080d2b..7667d97632fc 100644 --- a/src/imperative/exec_pass.h +++ b/src/imperative/exec_pass.h @@ -287,7 +287,7 @@ inline Graph MXGradient( std::string copy_op_str = std::string(), mxnet::ShapeVector in_arg_shapes = mxnet::ShapeVector(), DTypeVector in_arg_dtypes = DTypeVector(), - std::vector us = std::vector() ) { + std::vector us = std::vector()) { graph.attrs["grad_ys"] = std::make_shared(std::move(ys)); graph.attrs["grad_xs"] = std::make_shared(std::move(xs)); graph.attrs["grad_ys_out_grad"] = std::make_shared(std::move(ys_out_grad)); diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index af1ee097ac1e..b9bdaac9476f 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -161,7 +161,7 @@ void Imperative::MarkVariables(const std::vector& variables, } else { AGInfo& info = AGInfo::Get(variables[i]->autograd_entry_.node); CHECK_EQ(info.out_grads.size(), 0) - <<"The node has already been marked. Cannot mark it again."; + << "The node has already been marked. Cannot mark it again."; info.out_grads.emplace_back(gradients[i]->Detach()); info.grad_req = static_cast(grad_reqs[i]); info.ctx = variables[i]->ctx(); @@ -175,7 +175,7 @@ void Imperative::DropGrads(const std::vector& variables) { if (variable->autograd_entry_.node) { AGInfo& info = AGInfo::Get(variable->autograd_entry_.node); CHECK_NE(info.out_grads.size(), 0) - <<"The node has empty out_grads already. Cannot DropGrads again."; + << "The node has empty out_grads already. Cannot DropGrads again."; for (auto grad : info.out_grads) { grad.ReInit(); } @@ -188,8 +188,8 @@ void Imperative::DropGrads(const std::vector& variables) { void Imperative::GetBackwardDependency(const nnvm::ObjectPtr& node, uint32_t num_inputs, uint32_t num_outputs, - std::vector *p_save_inputs, - std::vector *p_save_outputs) { + std::vector* p_save_inputs, + std::vector* p_save_outputs) { static auto& fgradient = nnvm::Op::GetAttr("FGradient"); std::vector& save_inputs = *p_save_inputs; std::vector& save_outputs = *p_save_outputs; @@ -609,12 +609,11 @@ std::vector Imperative::Backward(const std::vector& outputs, arrays[eid] = x_grads[i - num_forward_outputs]; ref_count[eid] = 1; } - const std::vector& us_grads = - g_graph.GetAttr>("nleaf_grads"); + const std::vector& us_grads = g_graph.GetAttr>("nleaf_grads"); CHECK_EQ(us_grads.size(), us.size()) - << "Size of queried nleaf_vars and size of their gradients don't match."; + << "Size of queried nleaf_vars and size of their gradients don't match."; for (size_t i = 0; i < us_grads.size(); i++) { - size_t eid = idx.entry_id(us_grads[i]); + size_t eid = idx.entry_id(us_grads[i]); AGInfo& info = AGInfo::Get(us[i].node); if (arrays[eid]->dtype_ == -1) { arrays[eid] = &info.out_grads[0]; @@ -676,8 +675,8 @@ std::vector Imperative::Backward(const std::vector& outputs, array_reqs[eid] = x_reqs[i - num_forward_outputs]; } for (size_t i = 0; i < us_grads.size(); i++) { - size_t eid = idx.entry_id(us_grads[i]); - AGInfo& info = AGInfo::Get(us[i].node); + size_t eid = idx.entry_id(us_grads[i]); + AGInfo& info = AGInfo::Get(us[i].node); array_reqs[eid] = info.grad_req; } diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index b649958fa534..ce1a60fb2b20 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -353,8 +353,8 @@ inline void SetDependency(const nnvm::NodeAttrs& attrs, if (rsc_req || rsc_ex_req) { int ntmp = 0; auto resource_reqs = rsc_ex_req ? ftmp_resource_ex[attrs.op]( - attrs, static_cast(ctx.dev_mask()), dispatch_mode) - : ftmp_resource[attrs.op](attrs); + attrs, static_cast(ctx.dev_mask()), dispatch_mode) : + ftmp_resource[attrs.op](attrs); for (const auto& req : resource_reqs) { switch (req.type) { case ResourceRequest::kTempSpace: @@ -1318,9 +1318,9 @@ inline void CreateEngineOpSeg(const nnvm::IndexedGraph& idx, const auto& inode = idx[nid]; opr_names += op_name; opr_names += "{name=" + inode.source->attrs.name + ";"; - const std::unordered_map &dict = inode.source->attrs.dict; - auto num_dict_entries = dict.size(); - for (auto &k : dict) { + const std::unordered_map& dict = inode.source->attrs.dict; + auto num_dict_entries = dict.size(); + for (auto& k : dict) { opr_names += k.first + "=" + k.second; if (--num_dict_entries != 0) opr_names += ";"; diff --git a/src/io/iter_prefetcher.h b/src/io/iter_prefetcher.h index 91d70576bc9d..5f859b3d2bfe 100644 --- a/src/io/iter_prefetcher.h +++ b/src/io/iter_prefetcher.h @@ -87,9 +87,9 @@ class PrefetcherIter : public IIterator { (*dptr)->index.resize(batch.batch_size); for (size_t i = 0; i < batch.data.size(); ++i) { auto dtype = param_.dtype ? param_.dtype.value() : batch.data[i].type_flag_; - auto ctx = ((param_.ctx == PrefetcherParam::kCPUPinned) && (param_.device_id >= 0)) - ? Context::CPUPinned(param_.device_id) - : Context::CPU(); + auto ctx = ((param_.ctx == PrefetcherParam::kCPUPinned) && (param_.device_id >= 0)) ? + Context::CPUPinned(param_.device_id) : + Context::CPU(); (*dptr)->data.at(i) = NDArray(batch.data[i].shape_, ctx, false, dtype); } } diff --git a/src/kvstore/comm.h b/src/kvstore/comm.h index 5a1df937f6eb..5fdb0e912103 100644 --- a/src/kvstore/comm.h +++ b/src/kvstore/comm.h @@ -206,9 +206,9 @@ class CommCPU : public Comm { Engine::CallbackOnComplete on_complete) { on_start(); NDArray out = buf_merged; - is_serial_push_ - ? ReduceSumCPUExSerial(reduce, &out) - : mxnet::ndarray::ElementwiseSum(rctx.get_stream(), rsc, reduce, &out); + is_serial_push_ ? + ReduceSumCPUExSerial(reduce, &out) : + mxnet::ndarray::ElementwiseSum(rctx.get_stream(), rsc, reduce, &out); on_complete(); }, Context::CPU(), @@ -263,10 +263,10 @@ class CommCPU : public Comm { const bool is_same_ctx = out->ctx() == src.ctx(); const bool is_diff_var = out->var() != src.var(); NDArray retained_cpu = - (is_same_ctx && is_diff_var) - ? *out - : NDArray( - kRowSparseStorage, src.shape(), src.ctx(), true, src.dtype(), src.aux_types()); + (is_same_ctx && is_diff_var) ? + *out : + NDArray( + kRowSparseStorage, src.shape(), src.ctx(), true, src.dtype(), src.aux_types()); if (!is_diff_var) { common::LogOnce("The output of row_sparse_pull() on key " + std::to_string(key) + "refers to the same NDArray as the one stored in KVStore." @@ -670,13 +670,11 @@ class CommDevice : public Comm { // retain according to indices const bool is_same_ctx = out->ctx() == src.ctx(); const bool is_diff_var = out->var() != src.var(); - NDArray retained_gpu = (is_same_ctx && is_diff_var) ? *out - : NDArray(kRowSparseStorage, - out->shape(), - src.ctx(), - true, - out->dtype(), - out->aux_types()); + NDArray retained_gpu = + (is_same_ctx && is_diff_var) ? + *out : + NDArray( + kRowSparseStorage, out->shape(), src.ctx(), true, out->dtype(), out->aux_types()); if (!is_diff_var) { common::LogOnce("The output of row_sparse_pull() on key " + std::to_string(key) + "refers to the same NDArray as the one stored in KVStore." diff --git a/src/kvstore/gpu_topology.h b/src/kvstore/gpu_topology.h index 319b04000da7..b21cac3c6d48 100644 --- a/src/kvstore/gpu_topology.h +++ b/src/kvstore/gpu_topology.h @@ -588,8 +588,8 @@ inline int KLGenerateBinaryTree(const std::vector& W, parent = (parent == -1) ? GetRoot(P, color, *roots) : parent; int from_cluster = color; - int dest_cluster = (from_cluster == (*cluster_pairs)[i].first) ? (*cluster_pairs)[i].second - : (*cluster_pairs)[i].first; + int dest_cluster = (from_cluster == (*cluster_pairs)[i].first) ? (*cluster_pairs)[i].second : + (*cluster_pairs)[i].first; std::vector candidates; T weight; diff --git a/src/kvstore/kvstore_dist.h b/src/kvstore/kvstore_dist.h index 09612a5aeb60..a80176494e1b 100644 --- a/src/kvstore/kvstore_dist.h +++ b/src/kvstore/kvstore_dist.h @@ -507,16 +507,16 @@ class KVStoreDist : public KVStoreLocal { size_t size = recv_buf.shape().Size(); const int dtype = recv_buf.dtype(); const int num_bytes = mshadow::mshadow_sizeof(dtype); - PSKV& pskv = (gradient_compression_->get_type() == CompressionType::kNone) - ? EncodeDefaultKey(key, size, num_bytes) - : EncodeCompressedKey(key, size, false, num_bytes); + PSKV& pskv = (gradient_compression_->get_type() == CompressionType::kNone) ? + EncodeDefaultKey(key, size, num_bytes) : + EncodeCompressedKey(key, size, false, num_bytes); char* data = static_cast(recv_buf.data().dptr_); // false means not to delete data when SArray is deleted auto vals = new ps::SArray(data, size * num_bytes, false); // issue pull - RequestType mode = (gradient_compression_->get_type() != CompressionType::kNone) - ? RequestType::kCompressedPushPull - : RequestType::kDefaultPushPull; + RequestType mode = (gradient_compression_->get_type() != CompressionType::kNone) ? + RequestType::kCompressedPushPull : + RequestType::kDefaultPushPull; const int cmd = GetCommandType(mode, dtype); CHECK_NOTNULL(ps_worker_)->ZPull(pskv.keys, vals, &pskv.lens, cmd, [vals, cb]() { delete vals; diff --git a/src/kvstore/p3store_dist.h b/src/kvstore/p3store_dist.h index 56912cd7abcf..5b5a13f2e346 100644 --- a/src/kvstore/p3store_dist.h +++ b/src/kvstore/p3store_dist.h @@ -88,7 +88,7 @@ class P3StoreDist : public KVStoreDist { char* data = static_cast(send_buf.data().dptr_); // do push. false means no delete ps::SArray vals(data, size, false); - int cmd = GetCommandType(RequestType::kDefaultPushPull, dtype); + int cmd = GetCommandType(RequestType::kDefaultPushPull, dtype); size_t off = 0; auto counter = new std::atomic(pskv.keys.size()); for (size_t idx = 0; idx < pskv.keys.size(); idx++) { diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index cfcdab2e60cf..cdbb764bc535 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -62,13 +62,13 @@ void NDArray::ReInit(const NDArrayStorageType stype, if (!sparseStorage && stype != kCSRStorage) LOG(FATAL) << "Unknown storage type " << stype; - const auto& aux_types = (pAux_types && pAux_types->size()) - ? *pAux_types - : std::vector(sparseStorage ? 1 : 2, mshadow::kInt64); + const auto& aux_types = (pAux_types && pAux_types->size()) ? + *pAux_types : + std::vector(sparseStorage ? 1 : 2, mshadow::kInt64); - const auto& aux_shapes = (pAux_shapes && pAux_shapes->size()) - ? *pAux_shapes - : ShapeVector(sparseStorage ? 1 : 2, TShape(mshadow::Shape1(0))); + const auto& aux_shapes = (pAux_shapes && pAux_shapes->size()) ? + *pAux_shapes : + ShapeVector(sparseStorage ? 1 : 2, TShape(mshadow::Shape1(0))); mxnet::TShape storage_shape; if (!pStorage_shapes || !pStorage_shapes->Size()) { @@ -2435,9 +2435,7 @@ void NDArray::SyncCheckFormat(const bool full_check) const { } else { #if MXNET_USE_CUDA Engine::Get()->PushSync( - [&](RunContext rctx) { - common::CheckFormatWrapper(rctx, *this, err_cpu, full_check); - }, + [&](RunContext rctx) { common::CheckFormatWrapper(rctx, *this, err_cpu, full_check); }, this->ctx(), {this->var()}, {}, diff --git a/src/nnvm/gradient.cc b/src/nnvm/gradient.cc index 038d287a83d2..f0f48f625a40 100644 --- a/src/nnvm/gradient.cc +++ b/src/nnvm/gradient.cc @@ -88,8 +88,7 @@ Graph Gradient(Graph src) { const std::vector& ys_out_grad = src.GetAttr >("grad_ys_out_grad"); CHECK_EQ(ys.size(), ys_out_grad.size()); - const std::vector& us = - src.GetAttr >("grad_us"); + const std::vector& us = src.GetAttr >("grad_us"); // initialize a topological order of the graph nodes and `output_grads` // that maps every operator node to its gradient entries @@ -506,7 +505,6 @@ inline bool CheckGradAllZero(const std::vector& grads, return true; } - Graph BuildGradientGraph(const Graph& src, const std::vector& xs, const std::vector& topo_order, @@ -546,9 +544,9 @@ Graph BuildGradientGraph(const Graph& src, if (src.attrs.count("zero_ops") != 0) { zero_ops = src.GetAttr >("zero_ops"); } - const Op* copy_op = (src.attrs.count("copy_op_str") != 0) - ? Op::Get(src.GetAttr("copy_op_str")) - : nullptr; + const Op* copy_op = (src.attrs.count("copy_op_str") != 0) ? + Op::Get(src.GetAttr("copy_op_str")) : + nullptr; std::vector out_agg_grads; for (auto topo_order_rit = topo_order.rbegin(); topo_order_rit != topo_order.rend(); diff --git a/src/nnvm/plan_memory.cc b/src/nnvm/plan_memory.cc index 73f494334854..3859497e466f 100644 --- a/src/nnvm/plan_memory.cc +++ b/src/nnvm/plan_memory.cc @@ -379,9 +379,9 @@ Graph MXPlanMemory(Graph ret) { size_t min_allocated_bytes = -1; size_t max_match_range = dmlc::GetEnv("NNVM_EXEC_MATCH_RANGE", 16); size_t min_match_range = - dmlc::GetEnv("MXNET_MEMORY_OPT", 0) || dmlc::GetEnv("NNVM_AUTO_SEARCH_MATCH_RANGE", false) - ? 1 - : max_match_range; + dmlc::GetEnv("MXNET_MEMORY_OPT", 0) || dmlc::GetEnv("NNVM_AUTO_SEARCH_MATCH_RANGE", false) ? + 1 : + max_match_range; for (size_t match_range = min_match_range; match_range <= max_match_range; match_range *= 2) { // Make a copy of related fields StorageVector storage_vec(storage); diff --git a/src/operator/contrib/adamw.cu b/src/operator/contrib/adamw.cu index c3b83f412ae9..b67ea10e26a3 100644 --- a/src/operator/contrib/adamw.cu +++ b/src/operator/contrib/adamw.cu @@ -29,7 +29,7 @@ namespace op { namespace adamw { template <> -void GetScaleFloat(mshadow::Stream* s, const TBlob& scale_blob, float* pScalef) { +void GetScaleFloat(mshadow::Stream* s, const TBlob& scale_blob, float* pScalef){ MSHADOW_REAL_TYPE_SWITCH( scale_blob.type_flag_, DType, diff --git a/src/operator/contrib/bilinear_resize-inl.h b/src/operator/contrib/bilinear_resize-inl.h index acab01adf3d1..be57acc36ce1 100644 --- a/src/operator/contrib/bilinear_resize-inl.h +++ b/src/operator/contrib/bilinear_resize-inl.h @@ -132,8 +132,8 @@ static inline DType area_pixel_compute_scale(int64_t input_size, * src_idx + 0.5 = scale * (dst_index + 0.5) */ if (output_size > 1) { - return align_corners ? static_cast(input_size - 1) / (output_size - 1) - : static_cast(input_size) / output_size; + return align_corners ? static_cast(input_size - 1) / (output_size - 1) : + static_cast(input_size) / output_size; } else { return DType(0); } @@ -270,12 +270,12 @@ static bool BilinearSampleOpInferShape(const nnvm::NodeAttrs& attrs, break; } case bilinear_resize::odd_scale: { - new_height = ((dshape[2] % 2) == 0) - ? (int16_t)(dshape[2] * param.scale_height.value()) - : (int16_t)((dshape[2] - 1) * param.scale_height.value()) + 1; - new_width = ((dshape[3] % 2) == 0) - ? (int16_t)(dshape[3] * param.scale_width.value()) - : (int16_t)((dshape[3] - 1) * param.scale_width.value()) + 1; + new_height = ((dshape[2] % 2) == 0) ? + (int16_t)(dshape[2] * param.scale_height.value()) : + (int16_t)((dshape[2] - 1) * param.scale_height.value()) + 1; + new_width = ((dshape[3] % 2) == 0) ? + (int16_t)(dshape[3] * param.scale_width.value()) : + (int16_t)((dshape[3] - 1) * param.scale_width.value()) + 1; break; } case bilinear_resize::like: { diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h index 192605316fb7..1fc00e1b1483 100644 --- a/src/operator/contrib/bounding_box-inl.h +++ b/src/operator/contrib/bounding_box-inl.h @@ -943,21 +943,21 @@ struct box_encode { out_masks[a_index + 2] = valid; out_masks[a_index + 3] = valid; out_targets[a_index + 0] = - valid > static_cast(0.5) - ? ((ref_x - a_x) / a_width - static_cast(means[0])) / static_cast(stds[0]) - : static_cast(0.0); - out_targets[a_index + 1] = valid > static_cast(0.5) - ? ((ref_y - a_y) / a_height - static_cast(means[1])) / - static_cast(stds[1]) - : static_cast(0.0); - out_targets[a_index + 2] = valid > static_cast(0.5) - ? (log(ref_width / a_width) - static_cast(means[2])) / - static_cast(stds[2]) - : static_cast(0.0); - out_targets[a_index + 3] = valid > static_cast(0.5) - ? (log(ref_height / a_height) - static_cast(means[3])) / - static_cast(stds[3]) - : static_cast(0.0); + valid > static_cast(0.5) ? + ((ref_x - a_x) / a_width - static_cast(means[0])) / static_cast(stds[0]) : + static_cast(0.0); + out_targets[a_index + 1] = valid > static_cast(0.5) ? + ((ref_y - a_y) / a_height - static_cast(means[1])) / + static_cast(stds[1]) : + static_cast(0.0); + out_targets[a_index + 2] = valid > static_cast(0.5) ? + (log(ref_width / a_width) - static_cast(means[2])) / + static_cast(stds[2]) : + static_cast(0.0); + out_targets[a_index + 3] = valid > static_cast(0.5) ? + (log(ref_height / a_height) - static_cast(means[3])) / + static_cast(stds[3]) : + static_cast(0.0); } }; diff --git a/src/operator/contrib/bounding_box.cu b/src/operator/contrib/bounding_box.cu index 95fedde22491..ef2b7be50a37 100644 --- a/src/operator/contrib/bounding_box.cu +++ b/src/operator/contrib/bounding_box.cu @@ -489,9 +489,9 @@ __launch_bounds__(NMS::THRESHOLD) __global__ #pragma unroll for (int i = 0; i < n_threads / warp_size; ++i) { uint32_t my_mask = my_next_mask; - my_next_mask = (((i + 1) < n_threads / warp_size) && (my_element_in_batch < topk)) - ? nms_results[(i + 1) * topk * num_batches + my_element] - : full_mask; + my_next_mask = (((i + 1) < n_threads / warp_size) && (my_element_in_batch < topk)) ? + nms_results[(i + 1) * topk * num_batches + my_element] : + full_mask; if (my_warp == i && !__all_sync(full_mask, my_mask == full_mask)) { my_mask = my_mask | earlier_threads_mask; // Loop over warp_size - 1 because the last diff --git a/src/operator/contrib/deformable_psroi_pooling.cc b/src/operator/contrib/deformable_psroi_pooling.cc index 411802c031fa..ea878998dc19 100644 --- a/src/operator/contrib/deformable_psroi_pooling.cc +++ b/src/operator/contrib/deformable_psroi_pooling.cc @@ -94,17 +94,17 @@ inline void DeformablePSROIPoolForwardCPU(const index_t count, index_t part_w = floor(static_cast(pw) / pooled_width * part_size); index_t class_id = ctop / channels_each_class; DType trans_x = - no_trans - ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + + part_w] * + trans_std; DType trans_y = - no_trans ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * - part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + + part_w] * + trans_std; DType wstart = static_cast(pw) * bin_size_w + roi_start_w; wstart += trans_x * roi_width; @@ -246,17 +246,17 @@ inline void DeformablePSROIPoolBackwardAccCPU(const index_t count, index_t part_w = floor(static_cast(pw) / pooled_width * part_size); index_t class_id = ctop / channels_each_class; DType trans_x = - no_trans - ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + + part_w] * + trans_std; DType trans_y = - no_trans ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * - part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + + part_w] * + trans_std; DType wstart = static_cast(pw) * bin_size_w + roi_start_w; wstart += trans_x * roi_width; diff --git a/src/operator/contrib/deformable_psroi_pooling.cu b/src/operator/contrib/deformable_psroi_pooling.cu index b629fb90887c..82f53a03e0fd 100644 --- a/src/operator/contrib/deformable_psroi_pooling.cu +++ b/src/operator/contrib/deformable_psroi_pooling.cu @@ -94,17 +94,17 @@ __global__ void DeformablePSROIPoolForwardKernel(const index_t count, index_t part_w = floor(static_cast(pw) / pooled_width * part_size); index_t class_id = ctop / channels_each_class; DType trans_x = - no_trans - ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + + part_w] * + trans_std; DType trans_y = - no_trans ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * - part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + + part_w] * + trans_std; DType wstart = static_cast(pw) * bin_size_w + roi_start_w; wstart += trans_x * roi_width; @@ -248,17 +248,17 @@ __global__ void DeformablePSROIPoolBackwardAccKernel(const index_t count, index_t part_w = floor(static_cast(pw) / pooled_width * part_size); index_t class_id = ctop / channels_each_class; DType trans_x = - no_trans - ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + + part_w] * + trans_std; DType trans_y = - no_trans ? static_cast(0) - : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * - part_size + - part_w] * - trans_std; + no_trans ? + static_cast(0) : + bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + + part_w] * + trans_std; DType wstart = static_cast(pw) * bin_size_w + roi_start_w; wstart += trans_x * roi_width; diff --git a/src/operator/contrib/intgemm/prepare_weight_op.cc b/src/operator/contrib/intgemm/prepare_weight_op.cc index 798fe7621711..a7c3583193f8 100644 --- a/src/operator/contrib/intgemm/prepare_weight_op.cc +++ b/src/operator/contrib/intgemm/prepare_weight_op.cc @@ -160,9 +160,9 @@ The internal representation depends on register length. So AVX512, AVX2, and SS [](const NodeAttrs& attrs) { const PrepareWeightParam& params = nnvm::get(attrs.parsed); - return params.already_quantized - ? std::vector{"weight"} - : std::vector{"weight", "maxabs"}; + return params.already_quantized ? + std::vector{"weight"} : + std::vector{"weight", "maxabs"}; }) .set_attr("FInferShape", PrepareWeightOpShape) .set_attr("FInferType", PrepareWeightOpType) diff --git a/src/operator/contrib/multi_lamb.cc b/src/operator/contrib/multi_lamb.cc index 9afb6503abfb..866567d6aa21 100644 --- a/src/operator/contrib/multi_lamb.cc +++ b/src/operator/contrib/multi_lamb.cc @@ -44,8 +44,8 @@ struct MultiLAMBKernelStep1 { using namespace mshadow_op; for (size_t index = 0; index < kernel_params.ntensors; ++index) { if ((size_t)i < kernel_params.sizes[index]) { - MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] - : MPDType(kernel_params.weights[index][i]); + MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : + MPDType(kernel_params.weights[index][i]); MPDType scaled_grad = static_cast(kernel_params.grads[index][i]) * rescale_grad; if (clip_gradient >= 0.0f) scaled_grad = mshadow_op::clip::Map(scaled_grad, static_cast(clip_gradient)); @@ -91,8 +91,8 @@ struct MultiLAMBKernelStep2 { const OpReqType req) { for (size_t index = 0; index < kernel_params.ntensors; ++index) { if ((size_t)i < kernel_params.sizes[index]) { - MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] - : MPDType(kernel_params.weights[index][i]); + MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : + MPDType(kernel_params.weights[index][i]); float r1 = sqrt(sum_sq_weigths[index]); float r2 = sqrt(sum_sq_temp_g[index]); if (lower_bound >= 0) diff --git a/src/operator/contrib/multi_lamb.cu b/src/operator/contrib/multi_lamb.cu index 24525f8d8f2d..118ec6348ed7 100644 --- a/src/operator/contrib/multi_lamb.cu +++ b/src/operator/contrib/multi_lamb.cu @@ -72,9 +72,9 @@ __global__ void KernelStep1(const MultiLAMBKernelParam kernel_pa for (int ii = 0; ii < ILP_LAMB; ii++) { int load_pos = i + ii * blockDim.x; if (load_pos < stop_pos && load_pos < kernel_params.sizes[tensor_id]) { - r_weight[ii] = has_mixed_precision - ? kernel_params.weights32[tensor_id][load_pos] - : static_cast(kernel_params.weights[tensor_id][load_pos]); + r_weight[ii] = has_mixed_precision ? + kernel_params.weights32[tensor_id][load_pos] : + static_cast(kernel_params.weights[tensor_id][load_pos]); r_grad[ii] = static_cast(kernel_params.grads[tensor_id][load_pos]); r_mean[ii] = kernel_params.mean[tensor_id][load_pos]; r_var[ii] = kernel_params.var[tensor_id][load_pos]; @@ -145,9 +145,9 @@ __global__ void KernelStep2(const MultiLAMBKernelParam kernel_pa for (int ii = 0; ii < ILP_LAMB; ii++) { int load_pos = i + ii * blockDim.x; if (load_pos < stop_pos && load_pos < kernel_params.sizes[tensor_id]) { - r_weight[ii] = has_mixed_precision - ? kernel_params.weights32[tensor_id][load_pos] - : static_cast(kernel_params.weights[tensor_id][load_pos]); + r_weight[ii] = has_mixed_precision ? + kernel_params.weights32[tensor_id][load_pos] : + static_cast(kernel_params.weights[tensor_id][load_pos]); r_g[ii] = temp_g[kernel_params.tensor2temp_g[tensor_id] + load_pos]; } } diff --git a/src/operator/contrib/multi_lans.cc b/src/operator/contrib/multi_lans.cc index 154a4ce8fb4e..a7bb3ab69a77 100644 --- a/src/operator/contrib/multi_lans.cc +++ b/src/operator/contrib/multi_lans.cc @@ -45,8 +45,8 @@ struct MultiLANSKernelStep1 { using namespace mshadow_op; for (size_t index = 0; index < kernel_params.ntensors; ++index) { if ((size_t)i < kernel_params.sizes[index]) { - MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] - : MPDType(kernel_params.weights[index][i]); + MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : + MPDType(kernel_params.weights[index][i]); float g_norm = sqrt(g_sq_norm[index]); MPDType scaled_grad = static_cast(kernel_params.grads[index][i]) * rescale_grad; scaled_grad /= g_norm; @@ -95,8 +95,8 @@ struct MultiLANSKernelStep2 { const OpReqType req) { for (size_t index = 0; index < kernel_params.ntensors; ++index) { if ((size_t)i < kernel_params.sizes[index]) { - MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] - : MPDType(kernel_params.weights[index][i]); + MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : + MPDType(kernel_params.weights[index][i]); float r1 = sqrt(sum_sq_weigths[index]); float r2_m = sqrt(sum_sq_temp_m[index]); float r2_g = sqrt(sum_sq_temp_g[index]); diff --git a/src/operator/contrib/multi_lans.cu b/src/operator/contrib/multi_lans.cu index a57a99e25854..a9f59478cca1 100644 --- a/src/operator/contrib/multi_lans.cu +++ b/src/operator/contrib/multi_lans.cu @@ -72,9 +72,9 @@ __global__ void KernelStep1(const MultiLANSKernelParam kernel_pa for (int ii = 0; ii < ILP_LAMB; ii++) { int load_pos = i + ii * blockDim.x; if (load_pos < stop_pos && load_pos < kernel_params.sizes[tensor_id]) { - r_weight[ii] = has_mixed_precision - ? kernel_params.weights32[tensor_id][load_pos] - : static_cast(kernel_params.weights[tensor_id][load_pos]); + r_weight[ii] = has_mixed_precision ? + kernel_params.weights32[tensor_id][load_pos] : + static_cast(kernel_params.weights[tensor_id][load_pos]); r_grad[ii] = static_cast(kernel_params.grads[tensor_id][load_pos]); r_mean[ii] = kernel_params.mean[tensor_id][load_pos]; r_var[ii] = kernel_params.var[tensor_id][load_pos]; @@ -160,9 +160,9 @@ __global__ void KernelStep2(const MultiLANSKernelParam kernel_pa for (int ii = 0; ii < ILP_LAMB; ii++) { int load_pos = i + ii * blockDim.x; if (load_pos < stop_pos && load_pos < kernel_params.sizes[tensor_id]) { - r_weight[ii] = has_mixed_precision - ? kernel_params.weights32[tensor_id][load_pos] - : static_cast(kernel_params.weights[tensor_id][load_pos]); + r_weight[ii] = has_mixed_precision ? + kernel_params.weights32[tensor_id][load_pos] : + static_cast(kernel_params.weights[tensor_id][load_pos]); r_m[ii] = temp_m[kernel_params.tensor2temp_g[tensor_id] + load_pos]; r_g[ii] = temp_g[kernel_params.tensor2temp_g[tensor_id] + load_pos]; } diff --git a/src/operator/contrib/multi_lars-inl.h b/src/operator/contrib/multi_lars-inl.h index c5fd528c57f1..884e090f759e 100644 --- a/src/operator/contrib/multi_lars-inl.h +++ b/src/operator/contrib/multi_lars-inl.h @@ -68,10 +68,10 @@ struct MultiLARSKernel { bool is_lars_valid = w_norm > 0. && grads_sum_sq[i] > 0.; KERNEL_ASSIGN(out_data[i], req, - is_lars_valid - ? lrs[i] * eta * w_norm / - (sqrtf(grads_sum_sq[i]) * rescale_grad + wds[i] * w_norm + eps) - : lrs[i]); + is_lars_valid ? + lrs[i] * eta * w_norm / + (sqrtf(grads_sum_sq[i]) * rescale_grad + wds[i] * w_norm + eps) : + lrs[i]); } }; diff --git a/src/operator/control_flow.cc b/src/operator/control_flow.cc index 8d52b1aae1ff..4c663206031e 100644 --- a/src/operator/control_flow.cc +++ b/src/operator/control_flow.cc @@ -726,9 +726,9 @@ static void WhileLoopGradComputeExCPU(const OpStatePtr& state_ptr, } if (i < (size_t)params.num_args - 2U) { // a var - igrads[i] = (step == 0) - ? outputs[i] - : NDArray(outputs[i].shape(), outputs[i].ctx(), true, outputs[i].dtype()); + igrads[i] = (step == 0) ? + outputs[i] : + NDArray(outputs[i].shape(), outputs[i].ctx(), true, outputs[i].dtype()); iter_req[i] = (step == 0 || req[i] == kNullOp) ? req[i] : kWriteTo; ++i; diff --git a/src/operator/correlation.cc b/src/operator/correlation.cc index 582dd28925a2..b57ce86b1a8c 100644 --- a/src/operator/correlation.cc +++ b/src/operator/correlation.cc @@ -135,18 +135,18 @@ inline void CorrelationBackward(const Tensor& out_grad, if ((y1 + h - pad_size_ >= 0) && (x1 + w - pad_size_ >= 0) && (y1 + h < height + pad_size_) && (x1 + w < width + pad_size_)) { Dtype sign = (tmp1[nbatch][y1 + h][x1 + w][channel] >= - tmp2[nbatch][y2 + h][x2 + w][channel]) - ? Dtype(1.0) - : Dtype(-1.0); + tmp2[nbatch][y2 + h][x2 + w][channel]) ? + Dtype(1.0) : + Dtype(-1.0); in_grad1[nbatch][channel][y1 + h - pad_size_][x1 + w - pad_size_] += out_grad[nbatch][top_channel][i][j] * sign / sumelems; } if ((y2 + h - pad_size_ >= 0) && (x2 + w - pad_size_ >= 0) && (y2 + h < height + pad_size_) && (x2 + w < width + pad_size_)) { Dtype sign = (tmp1[nbatch][y1 + h][x1 + w][channel] >= - tmp2[nbatch][y2 + h][x2 + w][channel]) - ? Dtype(-1.0) - : Dtype(1.0); + tmp2[nbatch][y2 + h][x2 + w][channel]) ? + Dtype(-1.0) : + Dtype(1.0); in_grad2[nbatch][channel][y2 + h - pad_size_][x2 + w - pad_size_] += out_grad[nbatch][top_channel][i][j] * sign / sumelems; } diff --git a/src/operator/leaky_relu.cc b/src/operator/leaky_relu.cc index dd331ade231c..ff2ce4aae2a4 100644 --- a/src/operator/leaky_relu.cc +++ b/src/operator/leaky_relu.cc @@ -181,17 +181,17 @@ The following modified ReLU Activation functions are supported: [](const NodeAttrs& attrs) { const LeakyReLUParam& param = nnvm::get(attrs.parsed); - return param.act_type == leakyrelu::kPReLU - ? std::vector{"data", "gamma"} - : std::vector{"data"}; + return param.act_type == leakyrelu::kPReLU ? + std::vector{"data", "gamma"} : + std::vector{"data"}; }) .set_attr("FListOutputNames", [](const NodeAttrs& attrs) { const LeakyReLUParam& param = nnvm::get(attrs.parsed); - return param.act_type == leakyrelu::kRReLU - ? std::vector{"output", "mask"} - : std::vector{"output"}; + return param.act_type == leakyrelu::kRReLU ? + std::vector{"output", "mask"} : + std::vector{"output"}; }) .set_attr("FInferShape", LeakyReLUShape) .set_attr("FInferType", LeakyReLUType) diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index 09e42481a66b..72f7b294b9f9 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -580,36 +580,34 @@ struct AccType { .add_enum("int64", mshadow::kInt64) \ .add_enum("bool", mshadow::kBool) -#define MXNET_ADD_ALL_TYPES_EXT \ - .add_enum("float32", mshadow::kFloat32) \ - .add_enum("float64", mshadow::kFloat64) \ - .add_enum("float16", mshadow::kFloat16) \ - .add_enum("bfloat16", mshadow::kBfloat16) \ - .add_enum("uint8", mshadow::kUint8) \ - .add_enum("int8", mshadow::kInt8) \ - .add_enum("int32", mshadow::kInt32) \ - .add_enum("int64", mshadow::kInt64) \ - .add_enum("int16", mshadow::kInt16) \ - .add_enum("uint16", mshadow::kUint16) \ - .add_enum("uint32", mshadow::kUint32) \ - .add_enum("uint64", mshadow::kUint64) - - -#define MXNET_ADD_ALL_TYPES_EXT_WITH_BOOL \ - .add_enum("float32", mshadow::kFloat32) \ - .add_enum("float64", mshadow::kFloat64) \ - .add_enum("float16", mshadow::kFloat16) \ - .add_enum("bfloat16", mshadow::kBfloat16) \ - .add_enum("uint8", mshadow::kUint8) \ - .add_enum("int8", mshadow::kInt8) \ - .add_enum("int32", mshadow::kInt32) \ - .add_enum("int64", mshadow::kInt64) \ - .add_enum("bool", mshadow::kBool) \ - .add_enum("int16", mshadow::kInt16) \ - .add_enum("uint16", mshadow::kUint16) \ - .add_enum("uint32", mshadow::kUint32) \ - .add_enum("uint64", mshadow::kUint64) +#define MXNET_ADD_ALL_TYPES_EXT \ + .add_enum("float32", mshadow::kFloat32) \ + .add_enum("float64", mshadow::kFloat64) \ + .add_enum("float16", mshadow::kFloat16) \ + .add_enum("bfloat16", mshadow::kBfloat16) \ + .add_enum("uint8", mshadow::kUint8) \ + .add_enum("int8", mshadow::kInt8) \ + .add_enum("int32", mshadow::kInt32) \ + .add_enum("int64", mshadow::kInt64) \ + .add_enum("int16", mshadow::kInt16) \ + .add_enum("uint16", mshadow::kUint16) \ + .add_enum("uint32", mshadow::kUint32) \ + .add_enum("uint64", mshadow::kUint64) +#define MXNET_ADD_ALL_TYPES_EXT_WITH_BOOL \ + .add_enum("float32", mshadow::kFloat32) \ + .add_enum("float64", mshadow::kFloat64) \ + .add_enum("float16", mshadow::kFloat16) \ + .add_enum("bfloat16", mshadow::kBfloat16) \ + .add_enum("uint8", mshadow::kUint8) \ + .add_enum("int8", mshadow::kInt8) \ + .add_enum("int32", mshadow::kInt32) \ + .add_enum("int64", mshadow::kInt64) \ + .add_enum("bool", mshadow::kBool) \ + .add_enum("int16", mshadow::kInt16) \ + .add_enum("uint16", mshadow::kUint16) \ + .add_enum("uint32", mshadow::kUint32) \ + .add_enum("uint64", mshadow::kUint64) /* \brief Compute flattened index given coordinates and shape. */ template diff --git a/src/operator/nn/batch_norm-inl.h b/src/operator/nn/batch_norm-inl.h index 8b5ff3c76f04..92eded093d9c 100644 --- a/src/operator/nn/batch_norm-inl.h +++ b/src/operator/nn/batch_norm-inl.h @@ -369,8 +369,8 @@ class BNTensor3 { inline BNTensor3(const TBlob& blob, const int indexOfChannel) : dptr_(blob.dptr()), indexOfChannel_(static_cast( - indexOfChannel < 0 ? (static_cast(blob.shape_.ndim()) + indexOfChannel) - : indexOfChannel)) { + indexOfChannel < 0 ? (static_cast(blob.shape_.ndim()) + indexOfChannel) : + indexOfChannel)) { CHECK_EQ(blob.type_flag_, mshadow::DataType::kFlag); shape_[OUTER] = 1; for (size_t i = 0; i < indexOfChannel_; ++i) { @@ -385,9 +385,9 @@ class BNTensor3 { inline BNTensor3(DType* p, const mxnet::TShape& shape, const int indexOfChannel) : dptr_(p), - indexOfChannel_(static_cast(indexOfChannel < 0 - ? (static_cast(shape.ndim()) + indexOfChannel) - : indexOfChannel)) { + indexOfChannel_(static_cast(indexOfChannel < 0 ? + (static_cast(shape.ndim()) + indexOfChannel) : + indexOfChannel)) { shape_[OUTER] = 1; for (size_t i = 0; i < indexOfChannel_; ++i) { shape_[OUTER] *= shape[i]; diff --git a/src/operator/nn/batch_norm.cu b/src/operator/nn/batch_norm.cu index 195423bd1419..29f3f61b6808 100644 --- a/src/operator/nn/batch_norm.cu +++ b/src/operator/nn/batch_norm.cu @@ -280,13 +280,13 @@ __launch_bounds__(inference_forward_threads) __global__ my_channel = my_channel % num_channels; AType current_input = static_cast(scratch.separate[j]); - AType invstd = small_num_channels ? saved_invstd[my_channel] - : variance_to_invstd(runningVar[my_channel], epsilon); + AType invstd = small_num_channels ? saved_invstd[my_channel] : + variance_to_invstd(runningVar[my_channel], epsilon); AType mean = small_num_channels ? saved_mean[my_channel] : runningMean[my_channel]; AType gamma = - small_num_channels - ? saved_weight[my_channel] - : ((weight != nullptr && (flags & FIX_GAMMA_FLAG) == 0) ? weight[my_channel] : 1); + small_num_channels ? + saved_weight[my_channel] : + ((weight != nullptr && (flags & FIX_GAMMA_FLAG) == 0) ? weight[my_channel] : 1); AType beta = small_num_channels ? saved_bias[my_channel] : ((bias != nullptr) ? bias[my_channel] : 0); current_input = gamma * (current_input - mean) * invstd + beta; @@ -346,11 +346,11 @@ __global__ void BatchNormalizationUpdateOutputKernel(DeviceTensor input, } // Write normalized and update the output - const AccReal gamma = ((flags & FIX_GAMMA_FLAG) == 0 && weight.numElements() > 0) - ? ScalarConvert::to(weight[plane]) - : ScalarConvert::to(1); - const AccReal beta = bias.numElements() > 0 ? ScalarConvert::to(bias[plane]) - : ScalarConvert::to(0); + const AccReal gamma = ((flags & FIX_GAMMA_FLAG) == 0 && weight.numElements() > 0) ? + ScalarConvert::to(weight[plane]) : + ScalarConvert::to(1); + const AccReal beta = bias.numElements() > 0 ? ScalarConvert::to(bias[plane]) : + ScalarConvert::to(0); for (int batch = 0, nbatch = input.OuterSize(); batch < nbatch; ++batch) { for (int x = threadIdx.x, nx = input.InnerSize(); x < nx; x += blockDim.x) { const DType inp = input.get_ref(batch, plane, x); @@ -648,9 +648,9 @@ static __global__ void BatchNormalizationBackwardKernel(const DeviceTensor input mean = ScalarConvert::to(tensors.saveMean[plane]); invstd = tensors.saveInvStd[plane]; - const AccReal weightVal = ((flags & FIX_GAMMA_FLAG) == 0 && tensors.weight.numElements() > 0) - ? ScalarConvert::to(tensors.weight[plane]) - : AccReal(1); + const AccReal weightVal = ((flags & FIX_GAMMA_FLAG) == 0 && tensors.weight.numElements() > 0) ? + ScalarConvert::to(tensors.weight[plane]) : + AccReal(1); const AccReal norm = AccReal(1) / N; // Compute two values across (batch, x/y/z) in one pass: @@ -951,9 +951,9 @@ static void BatchNormalizationBackward(mshadow::Stream* s, if (tensors.gradBias.numElements() <= 0) { flags_copy = (flags_copy & ~WRITE_BETA_FLAG); } - AccReal* gamma = ((flags & FIX_GAMMA_FLAG) == 0 && tensors.weight.numElements() > 0) - ? tensors.weight.dptr_ - : nullptr; + AccReal* gamma = ((flags & FIX_GAMMA_FLAG) == 0 && tensors.weight.numElements() > 0) ? + tensors.weight.dptr_ : + nullptr; if (param.axis == -1 || param.axis == in_data[batchnorm::kData].shape_.ndim() - 1) { const int C = gradOutput.ChannelCount(); diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 6206c8e809bf..f5a6f7f52ca9 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -255,8 +255,8 @@ bool SupportDNNLConcat(const std::vector& arrs) { // DO not support zero-size tensors. if (arr.shape().Size() == 0) return false; - int ndim = arr.shape().ndim(); - const int dnnl_ndims = arr.GetDNNLData()->get_desc().data.ndims; + int ndim = arr.shape().ndim(); + const int dnnl_ndims = arr.GetDNNLData()->get_desc().data.ndims; if ((ndim != 2 && ndim != 4) || ndim != dnnl_ndims) { return false; } diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 0e054c0ff07f..787fbc0ef497 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -126,9 +126,9 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<3> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] != -1 - ? (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 - : -1; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : + -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. @@ -177,12 +177,12 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<4> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] != -1 - ? (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 - : -1; - oshape[3] = dshape[3] != -1 - ? (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 - : -1; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : + -1; + oshape[3] = dshape[3] != -1 ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : + -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. @@ -239,15 +239,15 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<5> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] != -1 - ? (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 - : -1; - oshape[3] = dshape[3] != -1 - ? (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 - : -1; - oshape[4] = dshape[4] != -1 - ? (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 - : -1; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : + -1; + oshape[3] = dshape[3] != -1 ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : + -1; + oshape[4] = dshape[4] != -1 ? + (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : + -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCDHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.cu b/src/operator/nn/cudnn/cudnn_batch_norm.cu index f9c387cebd20..ce3d1e1b1b9b 100644 --- a/src/operator/nn/cudnn/cudnn_batch_norm.cu +++ b/src/operator/nn/cudnn/cudnn_batch_norm.cu @@ -60,18 +60,18 @@ void SetDescriptors(const BatchNormParam& param, const TBlob& x) { CHECK(param.axis == 1 || param.axis == x.shape_.ndim() - 1); cudnnTensorFormat_t format = param.axis == 1 ? CUDNN_TENSOR_NCHW : CUDNN_TENSOR_NHWC; - int n = x.shape_[0]; - int c = x.shape_[param.axis]; - size_t last_spatial_i = param.axis == 1 ? x.shape_.ndim() - 1 : x.shape_.ndim() - 2; - int w = x.shape_[last_spatial_i]; + int n = x.shape_[0]; + int c = x.shape_[param.axis]; + size_t last_spatial_i = param.axis == 1 ? x.shape_.ndim() - 1 : x.shape_.ndim() - 2; + int w = x.shape_[last_spatial_i]; int h = x.shape_.ProdShape(last_spatial_i - (x.shape_.ndim() - 3), last_spatial_i); MSHADOW_REAL_TYPE_SWITCH(x.type_flag_, DType, { - CUDNN_CALL(cudnnSetTensor4dDescriptor(Globals::Get().io_desc, format, - mshadow::DataType::kCudnnFlag, n, c, h, w)); + CUDNN_CALL(cudnnSetTensor4dDescriptor( + Globals::Get().io_desc, format, mshadow::DataType::kCudnnFlag, n, c, h, w)); }) - CUDNN_CALL(cudnnDeriveBNTensorDescriptor(Globals::Get().mean_desc, Globals::Get().io_desc, - CUDNN_BATCHNORM_SPATIAL)); + CUDNN_CALL(cudnnDeriveBNTensorDescriptor( + Globals::Get().mean_desc, Globals::Get().io_desc, CUDNN_BATCHNORM_SPATIAL)); } mshadow::TypeFlag ParamType(int x_type) { @@ -86,8 +86,10 @@ bool CudnnBatchNormSupports(const BatchNormParam& param, const TBlob& x) { return n >= 3 && (param.axis == 1 || param.axis == n - 1); } -void CudnnBatchNormForward(const BatchNormParam& param, const OpContext& ctx, - const std::vector& inputs, const std::vector& req, +void CudnnBatchNormForward(const BatchNormParam& param, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, const std::vector& outputs) { CHECK_EQ(inputs.size(), 5); if (ctx.is_train) { @@ -106,13 +108,20 @@ void CudnnBatchNormForward(const BatchNormParam& param, const OpContext& ctx, MSHADOW_REAL_TYPE_SWITCH(ParamType(inputs[batchnorm::kData].type_flag_), DType, { DType a = 1.0f; DType b = 0.0f; - if (param.fix_gamma) inputs[batchnorm::kGamma].FlatTo1D(s) = 1.0f; + if (param.fix_gamma) + inputs[batchnorm::kGamma].FlatTo1D(s) = 1.0f; if (ctx.is_train) { size_t workspace_size = 0; CUDNN_CALL(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( - s->dnn_handle_, CUDNN_BATCHNORM_SPATIAL_PERSISTENT, CUDNN_BATCHNORM_OPS_BN, - Globals::Get().io_desc, nullptr, Globals::Get().io_desc, Globals::Get().mean_desc, - nullptr, &workspace_size)); + s->dnn_handle_, + CUDNN_BATCHNORM_SPATIAL_PERSISTENT, + CUDNN_BATCHNORM_OPS_BN, + Globals::Get().io_desc, + nullptr, + Globals::Get().io_desc, + Globals::Get().mean_desc, + nullptr, + &workspace_size)); auto workspace = ctx.requested[0].get_space_internal(workspace_size, "CudnnBatchNormForward"); // If the lock on the auxiliary states is set, then this implies that @@ -122,30 +131,50 @@ void CudnnBatchNormForward(const BatchNormParam& param, const OpContext& ctx, // the `momentum` to `1` (or `factor` to `0`). double factor = ((dmlc::GetEnv("MXNET_BACKWARD_DO_MIRROR", 0) || dmlc::GetEnv("MXNET_MEMORY_OPT", 0)) && - Globals::Get().internal_aux_states_lock) - ? 0 - : (1 - param.momentum); - CUDNN_CALL(cudnnBatchNormalizationForwardTrainingEx( - s->dnn_handle_, CUDNN_BATCHNORM_SPATIAL_PERSISTENT, CUDNN_BATCHNORM_OPS_BN, &a, &b, - Globals::Get().io_desc, inputs[batchnorm::kData].dptr_, - nullptr, nullptr, // zDesc, zData - Globals::Get().io_desc, outputs[batchnorm::kOut].dptr_, - Globals::Get().mean_desc, - inputs[batchnorm::kGamma].dptr_, inputs[batchnorm::kBeta].dptr_, - factor, inputs[batchnorm::kInMovingMean].dptr_, inputs[batchnorm::kInMovingVar].dptr_, - param.eps, outputs[batchnorm::kMean].dptr_, outputs[batchnorm::kVar].dptr_, - nullptr, // activation desc - workspace, workspace_size, - nullptr, 0)); // reserveSpace, reserveSpaceSizeInBytes + Globals::Get().internal_aux_states_lock) ? + 0 : + (1 - param.momentum); + CUDNN_CALL( + cudnnBatchNormalizationForwardTrainingEx(s->dnn_handle_, + CUDNN_BATCHNORM_SPATIAL_PERSISTENT, + CUDNN_BATCHNORM_OPS_BN, + &a, + &b, + Globals::Get().io_desc, + inputs[batchnorm::kData].dptr_, + nullptr, + nullptr, // zDesc, zData + Globals::Get().io_desc, + outputs[batchnorm::kOut].dptr_, + Globals::Get().mean_desc, + inputs[batchnorm::kGamma].dptr_, + inputs[batchnorm::kBeta].dptr_, + factor, + inputs[batchnorm::kInMovingMean].dptr_, + inputs[batchnorm::kInMovingVar].dptr_, + param.eps, + outputs[batchnorm::kMean].dptr_, + outputs[batchnorm::kVar].dptr_, + nullptr, // activation desc + workspace, + workspace_size, + nullptr, + 0)); // reserveSpace, reserveSpaceSizeInBytes } else { - CUDNN_CALL(cudnnBatchNormalizationForwardInference( - s->dnn_handle_, CUDNN_BATCHNORM_SPATIAL, &a, &b, - Globals::Get().io_desc, inputs[batchnorm::kData].dptr_, - Globals::Get().io_desc, outputs[batchnorm::kOut].dptr_, - Globals::Get().mean_desc, - inputs[batchnorm::kGamma].dptr_, inputs[batchnorm::kBeta].dptr_, - inputs[batchnorm::kInMovingMean].dptr_, inputs[batchnorm::kInMovingVar].dptr_, - param.eps)); + CUDNN_CALL(cudnnBatchNormalizationForwardInference(s->dnn_handle_, + CUDNN_BATCHNORM_SPATIAL, + &a, + &b, + Globals::Get().io_desc, + inputs[batchnorm::kData].dptr_, + Globals::Get().io_desc, + outputs[batchnorm::kOut].dptr_, + Globals::Get().mean_desc, + inputs[batchnorm::kGamma].dptr_, + inputs[batchnorm::kBeta].dptr_, + inputs[batchnorm::kInMovingMean].dptr_, + inputs[batchnorm::kInMovingVar].dptr_, + param.eps)); } }) // Set the lock on the auxiliary states. @@ -154,23 +183,33 @@ void CudnnBatchNormForward(const BatchNormParam& param, const OpContext& ctx, Globals::Get().internal_aux_states_lock = true; } -void CudnnBatchNormBackward(const BatchNormParam& param, const OpContext& ctx, - const std::vector& inputs, const std::vector& req, +void CudnnBatchNormBackward(const BatchNormParam& param, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, const std::vector& outputs) { CHECK_EQ(inputs.size(), 8); CHECK_EQ(outputs.size(), 3); CHECK_EQ(req.size(), 3); SetDescriptors(param, inputs[3 + batchnorm::kData]); - auto s = ctx.get_stream(); + auto s = ctx.get_stream(); size_t workspace_size = 0; - CUDNN_CALL(cudnnGetBatchNormalizationBackwardExWorkspaceSize( - s->dnn_handle_, CUDNN_BATCHNORM_SPATIAL_PERSISTENT, CUDNN_BATCHNORM_OPS_BN, - Globals::Get().io_desc, Globals::Get().io_desc, Globals::Get().io_desc, nullptr, - Globals::Get().io_desc, Globals::Get().mean_desc, nullptr, &workspace_size)); + CUDNN_CALL(cudnnGetBatchNormalizationBackwardExWorkspaceSize(s->dnn_handle_, + CUDNN_BATCHNORM_SPATIAL_PERSISTENT, + CUDNN_BATCHNORM_OPS_BN, + Globals::Get().io_desc, + Globals::Get().io_desc, + Globals::Get().io_desc, + nullptr, + Globals::Get().io_desc, + Globals::Get().mean_desc, + nullptr, + &workspace_size)); auto workspace = ctx.requested[0].get_space_internal(workspace_size, "CudnnBatchNormBackward"); MSHADOW_REAL_TYPE_SWITCH(ParamType(inputs[3 + batchnorm::kData].type_flag_), DType, { - if (param.fix_gamma) inputs[3 + batchnorm::kGamma].FlatTo1D(s) = 1.0f; + if (param.fix_gamma) + inputs[3 + batchnorm::kGamma].FlatTo1D(s) = 1.0f; bool grad_add_gamma_beta = req[batchnorm::kGamma] == kAddTo || req[batchnorm::kBeta] == kAddTo; if (grad_add_gamma_beta) { if (IsBNWriting(req[batchnorm::kGamma])) @@ -178,28 +217,43 @@ void CudnnBatchNormBackward(const BatchNormParam& param, const OpContext& ctx, if (IsBNWriting(req[batchnorm::kBeta])) outputs[batchnorm::kBeta].FlatTo1D(s) = 0.0f; } - DType a = 1.0f; - DType b = 0.0f; - DType b_add = 1.0f; + DType a = 1.0f; + DType b = 0.0f; + DType b_add = 1.0f; const bool global_stats = !ctx.is_train || param.use_global_stats; - CUDNN_CALL(cudnnBatchNormalizationBackwardEx( - s->dnn_handle_, CUDNN_BATCHNORM_SPATIAL_PERSISTENT, CUDNN_BATCHNORM_OPS_BN, - &a, req[batchnorm::kData] == kAddTo ? &b_add : &b, - &a, grad_add_gamma_beta ? &b_add : &b, - Globals::Get().io_desc, inputs[3 + batchnorm::kData].dptr_, - nullptr, nullptr, // yDesc, yData - Globals::Get().io_desc, inputs[batchnorm::kOut].dptr_, - nullptr, nullptr, // dzDesc, dzData - Globals::Get().io_desc, outputs[batchnorm::kData].dptr_, - Globals::Get().mean_desc, - inputs[3 + batchnorm::kGamma].dptr_, inputs[3 + batchnorm::kBeta].dptr_, - outputs[batchnorm::kGamma].dptr_, outputs[batchnorm::kBeta].dptr_, param.eps, - global_stats ? nullptr : inputs[batchnorm::kMean].dptr_, - global_stats ? nullptr : inputs[batchnorm::kVar].dptr_, - nullptr, // activationDesc - workspace, workspace_size, - nullptr, 0)); // reserveSpace, reserveSpaceSizeInBytes - if (param.fix_gamma) outputs[batchnorm::kGamma].FlatTo1D(s) = 0.0f; + CUDNN_CALL( + cudnnBatchNormalizationBackwardEx(s->dnn_handle_, + CUDNN_BATCHNORM_SPATIAL_PERSISTENT, + CUDNN_BATCHNORM_OPS_BN, + &a, + req[batchnorm::kData] == kAddTo ? &b_add : &b, + &a, + grad_add_gamma_beta ? &b_add : &b, + Globals::Get().io_desc, + inputs[3 + batchnorm::kData].dptr_, + nullptr, + nullptr, // yDesc, yData + Globals::Get().io_desc, + inputs[batchnorm::kOut].dptr_, + nullptr, + nullptr, // dzDesc, dzData + Globals::Get().io_desc, + outputs[batchnorm::kData].dptr_, + Globals::Get().mean_desc, + inputs[3 + batchnorm::kGamma].dptr_, + inputs[3 + batchnorm::kBeta].dptr_, + outputs[batchnorm::kGamma].dptr_, + outputs[batchnorm::kBeta].dptr_, + param.eps, + global_stats ? nullptr : inputs[batchnorm::kMean].dptr_, + global_stats ? nullptr : inputs[batchnorm::kVar].dptr_, + nullptr, // activationDesc + workspace, + workspace_size, + nullptr, + 0)); // reserveSpace, reserveSpaceSizeInBytes + if (param.fix_gamma) + outputs[batchnorm::kGamma].FlatTo1D(s) = 0.0f; }) Globals::Get().internal_aux_states_lock = false; } diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.h b/src/operator/nn/cudnn/cudnn_batch_norm.h index 0f6bebce70b6..4a9905367763 100644 --- a/src/operator/nn/cudnn/cudnn_batch_norm.h +++ b/src/operator/nn/cudnn/cudnn_batch_norm.h @@ -21,7 +21,7 @@ * \file cudnn_batch_norm.h * \brief * \author Junyuan Xie -*/ + */ #ifndef MXNET_OPERATOR_NN_CUDNN_CUDNN_BATCH_NORM_H_ #define MXNET_OPERATOR_NN_CUDNN_CUDNN_BATCH_NORM_H_ @@ -39,12 +39,16 @@ STATIC_ASSERT_CUDNN_VERSION_GE(7401); bool CudnnBatchNormSupports(const BatchNormParam& param, const TBlob& x); -void CudnnBatchNormForward(const BatchNormParam& param, const OpContext& ctx, - const std::vector& inputs, const std::vector& req, +void CudnnBatchNormForward(const BatchNormParam& param, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, const std::vector& outputs); -void CudnnBatchNormBackward(const BatchNormParam& param, const OpContext& ctx, - const std::vector& inputs, const std::vector& req, +void CudnnBatchNormBackward(const BatchNormParam& param, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, const std::vector& outputs); #endif // MXNET_USE_CUDNN == 1 diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h new file mode 100644 index 000000000000..f295f144efe3 --- /dev/null +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -0,0 +1,831 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file cudnn_convolution-inl.h + * \brief + * \author Bing Xu + */ +#ifndef MXNET_OPERATOR_NN_CUDNN_CUDNN_CONVOLUTION_INL_H_ +#define MXNET_OPERATOR_NN_CUDNN_CUDNN_CONVOLUTION_INL_H_ + +#include +#include +#include +#include +#include +#include "../convolution-inl.h" +#include "./cudnn_algoreg-inl.h" +#include "../../../common/cuda/utils.h" + +namespace mxnet { +namespace op { +#if MXNET_USE_CUDNN == 1 + +/*! + * \brief The Operator used to perform convolution using cuDNN kernels. + */ +template +class CuDNNConvolutionOp { + STATIC_ASSERT_CUDNN_VERSION_GE(7000); + + public: + CuDNNConvolutionOp() { + CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc_)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&bias_desc_)); + CUDNN_CALL(cudnnCreateFilterDescriptor(&filter_desc_)); + CUDNN_CALL(cudnnCreateConvolutionDescriptor(&forward_conv_desc_)); + CUDNN_CALL(cudnnCreateConvolutionDescriptor(&back_conv_desc_)); + CUDNN_CALL(cudnnCreateConvolutionDescriptor(&back_conv_desc_w_)); + parallelize_backward_kernels_ = Context::GetGPUStreamsPerWorker() >= 2; + } + + void Init(const ConvolutionParam& param, + int forward_compute_type, + int backward_compute_type, + const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + const RunContext& rctx, + bool add_to_weight) { + using namespace mshadow; + this->param_ = param; + this->add_to_weight_ = add_to_weight; + InitBufferForParam(); + auto cudnn_forward_compute_type = convertToCuDNNDataType(forward_compute_type); + auto cudnn_backward_compute_type = convertToCuDNNDataType(backward_compute_type); + // convert MB to words + param_.workspace = (param_.workspace << 20) / sizeof(DType); + dtype_ = DataType::kCudnnFlag; + // TensorCore algos only allowed on fp16-I/O convolutions if permitted by the global policy. + cudnn_tensor_core_ = DataType::kFlag == kFloat16 && GetEnvAllowTensorCore(); + + auto effective_layout = param_.layout.value(); + switch (effective_layout) { + // 1D convolutions will be executed as 2D convolutions with a height of 1. + case mshadow::kNCW: + effective_layout = mshadow::kNCHW; + break; + case mshadow::kNWC: + effective_layout = mshadow::kNHWC; + break; + case mshadow::kCWN: + effective_layout = mshadow::kCHWN; + break; + default: + break; + } + + MSHADOW_LAYOUT_SWITCH(effective_layout, Layout, { format_ = LayoutType::kCudnnFlag; }); + // Double check to make sure this class supports the operation + if (!Supports(param, forward_compute_type, backward_compute_type, rctx.ctx.dev_id)) + LOG(FATAL) << "Convolution parameters not supported by cuDNN implementation."; + + InitDescriptors(in_shape, out_shape, cudnn_forward_compute_type, cudnn_backward_compute_type); + + if (!param_.cudnn_tune) { + param_.cudnn_tune = dmlc::GetEnv("MXNET_CUDNN_AUTOTUNE_DEFAULT", 1); + } + // In cuDNN_v6, dilated convolution descriptors are compatible with only a + // single convolution algorithm. Despite this, we go through the algorithm + // selection process, which will return the only algorithm supported. This + // approach keeps the treatment of convolution cases uniform and will + // naturally respond to more algorithms supporting dilated convolutions in + // future cuDNN releases. + SelectAlgo(rctx, in_shape, out_shape, cudnn_forward_compute_type, cudnn_backward_compute_type); + GetTempSize(rctx); + } + + ~CuDNNConvolutionOp() { + CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc_)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc_)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(bias_desc_)); + CUDNN_CALL(cudnnDestroyFilterDescriptor(filter_desc_)); + CUDNN_CALL(cudnnDestroyConvolutionDescriptor(forward_conv_desc_)); + CUDNN_CALL(cudnnDestroyConvolutionDescriptor(back_conv_desc_)); + CUDNN_CALL(cudnnDestroyConvolutionDescriptor(back_conv_desc_w_)); + } + + void Forward(const OpContext& ctx, + const std::vector& in_data, + const std::vector& req, + const std::vector& out_data) { + using namespace mshadow; + size_t expected = param_.no_bias ? 2 : 3; + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(out_data.size(), 1U); + Stream* s = ctx.get_stream(); + Tensor workspace = AllocateTempWorkspace(ctx, forward_workspace_byte_); + size_t workspace_size = TensorSizeBytes(workspace); + + // I/O's should have 2 more dims than the kernel dim + DType* data_ptr = GetNdPtr(in_data[conv::kData], param_.kernel.ndim() + 2, s); + DType* wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 2, s); + DType* out_ptr = GetNdPtr(out_data[conv::kOut], param_.kernel.ndim() + 2, s); + + typename DataType::ScaleType alpha = 1.0f; + typename DataType::ScaleType beta = 0.0f; + typename DataType::ScaleType beta_add = 1.0f; + CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_, + &alpha, + in_desc_, + data_ptr, + filter_desc_, + wmat_ptr, + forward_conv_desc_, + forward_algo_.AlgoNumber(), + workspace.dptr_, + workspace_size, + req[conv::kOut] == kAddTo ? &beta_add : &beta, + out_desc_, + out_ptr)); + + if (!param_.no_bias) { + Tensor bias = in_data[conv::kBias].get(s); + CUDNN_CALL(cudnnAddTensor( + s->dnn_handle_, &alpha, bias_desc_, bias.dptr_, &beta_add, out_desc_, out_ptr)); + } + } + + void Backward(const OpContext& ctx, + const std::vector& out_grad, + const std::vector& in_data, + const std::vector& req, + const std::vector& in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + size_t expected = param_.no_bias == 0 ? 3 : 2; + CHECK_EQ(out_grad.size(), 1U); + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(in_grad.size(), expected); + Stream* s = ctx.get_stream(); + // RAII object to handle syncing of the underlying auxiliary stream with the primary stream + SyncedGPUAuxStream s_dgrad = ctx.get_gpu_aux_stream(); + + // I/O's should have 2 more dims than the kernel dim + DType* grad_ptr = GetNdPtr(out_grad[conv::kOut], param_.kernel.ndim() + 2, s); + DType* wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 2, s); + DType* gwmat_ptr = GetNdPtr(in_grad[conv::kWeight], param_.kernel.ndim() + 2, s); + DType* data_ptr = GetNdPtr(in_data[conv::kData], param_.kernel.ndim() + 2, s); + DType* gdata_ptr = GetNdPtr(in_grad[conv::kData], param_.kernel.ndim() + 2, s); + + size_t backward_workspace_byte = + parallelize_backward_kernels_ ? + back_workspace_byte_dgrad_ + back_workspace_byte_wgrad_ : + std::max(back_workspace_byte_dgrad_, back_workspace_byte_wgrad_); + Tensor workspace = AllocateTempWorkspace(ctx, backward_workspace_byte); + size_t workspace_size = TensorSizeBytes(workspace); + DType* workspace_dptr_wgrad = workspace.dptr_; + DType* workspace_dptr_dgrad = workspace.dptr_; + if (parallelize_backward_kernels_) { + CHECK_LE(back_workspace_byte_dgrad_ + back_workspace_byte_wgrad_, workspace_size); + // Large allocations at some point will be given their own page. Pass this alignment on to + // the larger of the two separate dgrad/wgrad workspaces. This probably doesn't matter, but + // corresponds more closely to the workspace alignments used during cudnnFind. + if (back_workspace_byte_dgrad_ > back_workspace_byte_wgrad_) + workspace_dptr_wgrad = workspace.dptr_ + back_workspace_byte_dgrad_ / sizeof(DType); + else + workspace_dptr_dgrad = workspace.dptr_ + back_workspace_byte_wgrad_ / sizeof(DType); + } else { + CHECK_LE(back_workspace_byte_dgrad_, workspace_size); + CHECK_LE(back_workspace_byte_wgrad_, workspace_size); + } + typename DataType::ScaleType alpha = 1.0f; + typename DataType::ScaleType beta = 0.0f; + typename DataType::ScaleType beta_add = 1.0f; + if (req[conv::kWeight] != kNullOp) { + CHECK_EQ(add_to_weight_, req[conv::kWeight] == kAddTo); + CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_, + &alpha, + in_desc_, + data_ptr, + out_desc_, + grad_ptr, + back_conv_desc_w_, + back_algo_w_.AlgoNumber(), + workspace_dptr_wgrad, + back_workspace_byte_wgrad_, + req[conv::kWeight] == kAddTo ? &beta_add : &beta, + filter_desc_, + gwmat_ptr)); + } + if (!param_.no_bias && (req[conv::kBias] != kNullOp)) { + Tensor gbias = in_grad[conv::kBias].get(s); + CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_, + &alpha, + out_desc_, + grad_ptr, + req[conv::kBias] == kAddTo ? &beta_add : &beta, + bias_desc_, + gbias.dptr_)); + } + if (req[conv::kData] != kNullOp) { + CUDNN_CALL(cudnnConvolutionBackwardData(s_dgrad.GetStream()->dnn_handle_, + &alpha, + filter_desc_, + wmat_ptr, + out_desc_, + grad_ptr, + back_conv_desc_, + back_algo_.AlgoNumber(), + workspace_dptr_dgrad, + back_workspace_byte_dgrad_, + req[conv::kData] == kAddTo ? &beta_add : &beta, + in_desc_, + gdata_ptr)); + } + } + + /*! + * \brief Returns whether the cuDNN library version supports the convolution + * operation described by `param`: cuDNN v5 and earlier does not support + * dilated convolutions. Dilation only enabled after v6.0.20. + */ + static bool Supports(ConvolutionParam param, + int forward_compute_type, + int backward_compute_type, + int dev_id) { + using namespace mshadow; + + // NDHWC not supported, NHWC not supported in true fp16 + auto layout_val = param.layout.value(); + auto true_fp16 = DataType::kFlag == kFloat16 && + (forward_compute_type == kFloat16 || backward_compute_type == kFloat16); + if (layout_val == kNDHWC || layout_val == kNWC || layout_val == kNHWC && true_fp16) + return false; + + // Permits graceful fallback to pseudo-fp16 on heterogenous systems + if (!SupportsFloat16Compute(dev_id) && + (forward_compute_type == kFloat16 || backward_compute_type == kFloat16)) { + return false; + } + + return true; + } + + private: + /*! + * \brief Translate an mxnet datatype to the corresponding cudnnDataType_t. + */ + cudnnDataType_t convertToCuDNNDataType(int dtype) { + cudnnDataType_t converted = CUDNN_DATA_FLOAT; + // The following will always assign to `converted` or throw an exception. + MSHADOW_REAL_TYPE_SWITCH( + dtype, mxDType, { converted = mshadow::DataType::kCudnnFlag; }) + return converted; + } + + void InitDescriptors(const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + cudnnDataType_t cudnn_forward_compute_type, + cudnnDataType_t cudnn_backward_compute_type) { + using namespace mshadow; + size_t expected = param_.no_bias ? 2 : 3; + CHECK_EQ(in_shape.size(), expected); + CHECK_EQ(out_shape.size(), 1U); + + mxnet::TShape dshape = in_shape[conv::kData]; + mxnet::TShape wshape = in_shape[conv::kWeight]; + mxnet::TShape oshape = out_shape[conv::kOut]; + mxnet::TShape dstride, ostride; + + if (param_.kernel.ndim() == 1 || param_.kernel.ndim() == 2) { + // 1d or 2d conv + auto pad = param_.kernel.ndim() == 2 ? param_.pad : mxnet::TShape({0, param_.pad[0]}); + auto stride = + param_.kernel.ndim() == 2 ? param_.stride : mxnet::TShape({1, param_.stride[0]}); + auto dilate = + param_.kernel.ndim() == 2 ? param_.dilate : mxnet::TShape({1, param_.dilate[0]}); + CUDNN_CALL(cudnnSetConvolution2dDescriptor(forward_conv_desc_, + pad[0], + pad[1], + stride[0], + stride[1], + dilate[0], + dilate[1], + CUDNN_CROSS_CORRELATION, + cudnn_forward_compute_type)); + CUDNN_CALL(cudnnSetConvolution2dDescriptor(back_conv_desc_, + pad[0], + pad[1], + stride[0], + stride[1], + dilate[0], + dilate[1], + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + CUDNN_CALL(cudnnSetConvolution2dDescriptor(back_conv_desc_w_, + pad[0], + pad[1], + stride[0], + stride[1], + dilate[0], + dilate[1], + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + if (param_.kernel.ndim() == 2) { + wshape = ConvertLayout(wshape.get<4>(), param_.layout.value(), kNCHW); + dstride = ConvertLayout(Strides<4>(dshape), param_.layout.value(), kNCHW); + dshape = ConvertLayout(dshape.get<4>(), param_.layout.value(), kNCHW); + ostride = ConvertLayout(Strides<4>(oshape), param_.layout.value(), kNCHW); + oshape = ConvertLayout(oshape.get<4>(), param_.layout.value(), kNCHW); + } else { + wshape = ConvertLayout(wshape.get<3>(), param_.layout.value(), kNCW); + wshape = mxnet::TShape({wshape[0], wshape[1], 1, wshape[2]}); + dstride = ConvertLayout(Strides<3>(dshape), param_.layout.value(), kNCW); + dstride = mxnet::TShape({dstride[0], dstride[1], dstride[1], dstride[2]}); + dshape = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW); + dshape = mxnet::TShape({dshape[0], dshape[1], 1, dshape[2]}); + ostride = ConvertLayout(Strides<3>(oshape), param_.layout.value(), kNCW); + ostride = mxnet::TShape({ostride[0], ostride[1], ostride[1], ostride[2]}); + oshape = ConvertLayout(oshape.get<3>(), param_.layout.value(), kNCW); + oshape = mxnet::TShape({oshape[0], oshape[1], 1, oshape[2]}); + } + CUDNN_CALL(cudnnSetFilter4dDescriptor( + filter_desc_, dtype_, format_, wshape[0], wshape[1], wshape[2], wshape[3])); +#if CUDNN_VERSION >= 7301 && CUDNN_VERSION < 7500 + auto kernel_h = wshape[2]; + auto kernel_w = wshape[3]; + auto stride_h = stride[0]; + auto stride_w = stride[1]; + auto pad_h = pad[0]; + auto pad_w = pad[1]; + if (param_.layout.value() == kNCHW && + (((stride_h == 2) && (kernel_h % 2 == 0) && (pad_h % 2 == 0)) || + ((stride_w == 2) && (kernel_w % 2 == 0) && (pad_w % 2 == 0)))) { + exclude_dgrad_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING; + } +#endif + } else if (param_.kernel.ndim() == 3) { + // 3d conv + CHECK_EQ(param_.layout.value(), kNCDHW) << "CuDNN only support 3D conv with NCDHW layout"; + std::vector wshape_buffer(wshape.ndim()); + CUDNN_CALL(cudnnSetFilterNdDescriptor(filter_desc_, + dtype_, + CUDNN_TENSOR_NCHW, + static_cast(wshape.ndim()), + CastTShapeToIntPtr(wshape, &wshape_buffer))); + CUDNN_CALL(cudnnSetConvolutionNdDescriptor(forward_conv_desc_, + 3, + param_pad_.data(), + param_stride_.data(), + param_dilate_.data(), + CUDNN_CROSS_CORRELATION, + cudnn_forward_compute_type)); + + CUDNN_CALL(cudnnSetConvolutionNdDescriptor(back_conv_desc_, + 3, + param_pad_.data(), + param_stride_.data(), + param_dilate_.data(), + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + + CUDNN_CALL(cudnnSetConvolutionNdDescriptor(back_conv_desc_w_, + 3, + param_pad_.data(), + param_stride_.data(), + param_dilate_.data(), + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + + dstride = ConvertLayout(Strides<5>(dshape), param_.layout.value(), kNCDHW); + dshape = ConvertLayout(dshape.get<5>(), param_.layout.value(), kNCDHW); + ostride = ConvertLayout(Strides<5>(oshape), param_.layout.value(), kNCDHW); + oshape = ConvertLayout(oshape.get<5>(), param_.layout.value(), kNCDHW); + } + // Set "allow tensor core" flag in convolution descriptors, if available. + cudnnMathType_t math_type = cudnn_tensor_core_ ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH; +#if CUDNN_VERSION >= 7200 + if (GetEnvAllowTensorCore() && GetEnvAllowTensorCoreConversion() && + (DataType::kFlag != kFloat16)) + math_type = CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION; +#endif + CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type)); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type)); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type)); + CUDNN_CALL(cudnnSetConvolutionGroupCount(forward_conv_desc_, param_.num_group)); + CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_, param_.num_group)); + CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_w_, param_.num_group)); + + std::vector dshape_buffer(dshape.ndim()); + nnvm::ShapeTypeCast(dshape.begin(), dshape.end(), dshape_buffer.data()); + std::vector dstride_buffer(dstride.ndim()); + nnvm::ShapeTypeCast(dstride.begin(), dstride.end(), dstride_buffer.data()); + + CUDNN_CALL(cudnnSetTensorNdDescriptor(in_desc_, + dtype_, + static_cast(dshape.ndim()), + dshape_buffer.data(), + dstride_buffer.data())); + + std::vector oshape_buffer(oshape.ndim()); + nnvm::ShapeTypeCast(oshape.begin(), oshape.end(), oshape_buffer.data()); + std::vector ostride_buffer(ostride.ndim()); + nnvm::ShapeTypeCast(ostride.begin(), ostride.end(), ostride_buffer.data()); + CUDNN_CALL(cudnnSetTensorNdDescriptor(out_desc_, + dtype_, + static_cast(oshape.ndim()), + oshape_buffer.data(), + ostride_buffer.data())); + + if (!param_.no_bias) { + mxnet::TShape bias = in_shape[conv::kBias]; + int bias_dim = static_cast(bias[0]); + std::vector bias_shape = {1, bias_dim, 1, 1}; + std::vector bias_stride = {bias_dim, 1, bias_dim, bias_dim}; + if (param_.kernel.ndim() == 3) { + bias_shape.push_back(1); + bias_stride.push_back(bias_dim); + } + CUDNN_CALL(cudnnSetTensorNdDescriptor(bias_desc_, + dtype_, + static_cast(bias_shape.size()), + &bias_shape[0], + &bias_stride[0])); + } + } + + void CuDNNAlgoSetter(const RunContext& rctx, + const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + cudnnDataType_t cudnn_forward_compute_type, + cudnnDataType_t cudnn_backward_compute_type, + CuDNNAlgo* fwd, + CuDNNAlgo* bwd, + CuDNNAlgo* flt) { + // Not in algo registry, must determine via *Get*() or *Find*() + mshadow::Stream* s = rctx.get_stream(); + CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); + size_t workspace_byte = static_cast(param_.workspace * sizeof(DType)); + + // Since the function signature of *Get*_v7() matches that of *Find*(), + // we can unify the find-vs-get logic by using function pointers. + + // Forward Algorithm Find/Get() v7 + std::vector fwd_results(MaxForwardAlgos(s->dnn_handle_)); + int actual_fwd_algos = 0; + auto fwd_algo_discoverer = param_.cudnn_tune.value() == conv::kOff ? + cudnnGetConvolutionForwardAlgorithm_v7 : + cudnnFindConvolutionForwardAlgorithm; + CUDNN_CALL((*fwd_algo_discoverer)(s->dnn_handle_, + in_desc_, + filter_desc_, + forward_conv_desc_, + out_desc_, + fwd_results.size(), + &actual_fwd_algos, + fwd_results.data())); + fwd_results.resize(actual_fwd_algos); + AlgoFinalSelect( + fwd_results, "forward", workspace_byte, fwd); + + // Backprop-to-Filter Algorithm Find/Get() v7 + auto max_bwd_filt_algos = MaxBackwardFilterAlgos(s->dnn_handle_); + std::vector bwd_filt_results(max_bwd_filt_algos); + int actual_bwd_filter_algos = 0; + // In cudnn v7.1.4, find() returned wgrad algos that could fail for large c if we + // were summing into the output (i.e. beta != 0). Get() returned OK algos though. + auto bwd_filter_algo_discoverer = param_.cudnn_tune.value() == conv::kOff ? + cudnnGetConvolutionBackwardFilterAlgorithm_v7 : + cudnnFindConvolutionBackwardFilterAlgorithm; + CUDNN_CALL((*bwd_filter_algo_discoverer)(s->dnn_handle_, + in_desc_, + out_desc_, + back_conv_desc_w_, + filter_desc_, + bwd_filt_results.size(), + &actual_bwd_filter_algos, + bwd_filt_results.data())); + bwd_filt_results.resize(actual_bwd_filter_algos); + AlgoFinalSelect( + bwd_filt_results, "backprop-to-filter", workspace_byte, flt); + + // Backprop-to-Data Algorithm Find/Get() v7 + auto max_bwd_data_algos = MaxBackwardDataAlgos(s->dnn_handle_); + std::vector bwd_data_results(max_bwd_data_algos); + int actual_bwd_data_algos = 0; + auto bwd_data_algo_discoverer = param_.cudnn_tune.value() == conv::kOff ? + cudnnGetConvolutionBackwardDataAlgorithm_v7 : + cudnnFindConvolutionBackwardDataAlgorithm; + CUDNN_CALL((*bwd_data_algo_discoverer)(s->dnn_handle_, + filter_desc_, + out_desc_, + back_conv_desc_, + in_desc_, + bwd_data_results.size(), + &actual_bwd_data_algos, + bwd_data_results.data())); + bwd_data_results.resize(actual_bwd_data_algos); + AlgoFinalSelect( + bwd_data_results, "backprop-to-data", workspace_byte, bwd, exclude_dgrad_algo_); + + // Fix for issue #11241 + int cudnn_find_issue_max_features = 64 * 1024; + if (add_to_weight_ && Features(in_shape[conv::kData]) >= cudnn_find_issue_max_features) { + flt->Set(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true); + } + } + + void SelectAlgo(const RunContext& rctx, + const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + cudnnDataType_t cudnn_forward_compute_type, + cudnnDataType_t cudnn_backward_compute_type) { + auto algo_setter = [&](CuDNNAlgo* fwd, + CuDNNAlgo* bwd, + CuDNNAlgo* flt) { + if (param_.cudnn_tune.value() == conv::kOff) { + // The routine will only be calling cudnnGet, so no need to grab the Storage lock. + this->CuDNNAlgoSetter(rctx, + in_shape, + out_shape, + cudnn_forward_compute_type, + cudnn_backward_compute_type, + fwd, + bwd, + flt); + } else { + // One potential problem is that cudnnFind() uses cudaMalloc() to directly allocate + // I/O and workspace areas, and these allocations may result in an out-of-memory + // error even though the StorageMangager free pool is not empty. Ideally, cudnnFind + // would use MXNet's storage allocator for its I/O and workspace areas, instead of using + // the area carved out by MXNET_GPU_MEM_POOL_RESERVE. + // To get somewhat the same effect as this, we can pre-allocate the areas needed for the + // I/Os (possibly triggering a desirable StorageManager::ReleaseAll()), followed by a + // DirectFree(), which makes these areas available for cudnn's subsequent cudaMalloc(). + + // Allocate for x (or dx), w (or dw) and y (or dy). + ReserveElements({in_shape[conv::kData].Size(), + in_shape[conv::kWeight].Size(), + out_shape[conv::kOut].Size()}); + + // We're about to call cudnnFind so we need to quiet the system by grabbing + // the Storage lock. Concurrent cudaMalloc's can disrupt the accurate timing + // measurements of the algos, and can prevent the cuda driver's proper freeing + // of cudnnFind's internal temporary allocations. Grabbing the lock might also + // impede other threads from launching work on the GPU. + std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); + this->CuDNNAlgoSetter(rctx, + in_shape, + out_shape, + cudnn_forward_compute_type, + cudnn_backward_compute_type, + fwd, + bwd, + flt); + } + }; + + CuDNNConvAlgoReg::Get()->FindOrElseRegister(param_, + in_shape, + out_shape, + dtype_, + cudnn_forward_compute_type, + cudnn_backward_compute_type, + SMArch(rctx.ctx.dev_id), + add_to_weight_, + &forward_algo_, + &back_algo_, + &back_algo_w_, + algo_setter); + + // If we're allowing Tensor Core variants of the algos to be considered in + // *Find*() or *Get*(), but a non-Tensor-Core algo variant is the fastest, + // we must change the descriptor to preclude Tensor Core. Simplest is to + // once again set the mathType in all cases. + CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, forward_algo_.MathType())); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, back_algo_.MathType())); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, back_algo_w_.MathType())); + } + + // Look over the results from *Find*() or *Get*() and pick the fastest algo given possible + // workspace constraints. + template + void AlgoFinalSelect(const std::vector& perf_results, + std::string kernel_name, + size_t workspace_byte, + CuDNNAlgo* algo, + int32_t algo_exclude = -1) { + // Determine the fastest acceptable algo that matches the algo_preference (-1 = any), + // regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); + for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { + const auto& result = perf_results[i]; + bool algo_exclusion = static_cast(result.algo) == algo_exclude; + bool algo_is_tensor_core = false; + algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; + if (result.status == CUDNN_STATUS_SUCCESS && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + (param_.cudnn_tune.value() == conv::kLimited || result.memory <= workspace_byte) && + !algo_exclusion) { + algo->Set(result.algo, algo_is_tensor_core); + return; + } + } + auto mode = param_.cudnn_tune.value() == conv::kOff ? " get " : " find "; + LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " convolution algorithm. " + << " with workspace size of " << workspace_byte << " bytes," + << " please consider reducing batch/model size or increasing the workspace size"; + } + + void GetTempSize(const RunContext& rctx) { + mshadow::Stream* s = rctx.get_stream(); + CUDNN_CALL(cudnnGetConvolutionBackwardDataWorkspaceSize(s->dnn_handle_, + filter_desc_, + out_desc_, + back_conv_desc_, + in_desc_, + back_algo_.AlgoNumber(), + &back_workspace_byte_dgrad_)); + CUDNN_CALL(cudnnGetConvolutionBackwardFilterWorkspaceSize(s->dnn_handle_, + in_desc_, + out_desc_, + back_conv_desc_w_, + filter_desc_, + back_algo_w_.AlgoNumber(), + &back_workspace_byte_wgrad_)); + // cudaMalloc returns addresses that are aligned for large accesses (e.g. to 512 bytes). + // Since we only make one allocation and divide it into two parts when we parallelize + // the dgrad and wgrad kernels, we round the sizes up to this alignment size so the + // dptrs respect this alignment, even if the separate areas are stacked. + const size_t dptr_alignment = 512; + back_workspace_byte_dgrad_ = RoundToMultiple(back_workspace_byte_dgrad_, dptr_alignment); + back_workspace_byte_wgrad_ = RoundToMultiple(back_workspace_byte_wgrad_, dptr_alignment); + + CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(s->dnn_handle_, + in_desc_, + filter_desc_, + forward_conv_desc_, + out_desc_, + forward_algo_.AlgoNumber(), + &forward_workspace_byte_)); + } + + int* CastTShapeToIntPtr(const mxnet::TShape& s, std::vector* buffer) { + buffer->resize(s.ndim()); + nnvm::ShapeTypeCast(s.begin(), s.end(), buffer->data()); + return buffer->data(); + } + + // Converts a TBlob to a dptr, checking for the expected dim and that it's contiguous. + DType* GetNdPtr(const TBlob& tb, int dim, Stream* s) { + DType* data_ptr = nullptr; + if (dim == 3) { + Tensor data = tb.get(s); + CHECK_EQ(data.CheckContiguous(), true); + data_ptr = data.dptr_; + } else if (dim == 4) { + Tensor data = tb.get(s); + CHECK_EQ(data.CheckContiguous(), true); + data_ptr = data.dptr_; + } else if (dim == 5) { + Tensor data = tb.get(s); + CHECK_EQ(data.CheckContiguous(), true); + data_ptr = data.dptr_; + } else { + LOG(FATAL) << "Unexpected Tensor size " << dim << ", supporting only 3, 4 or 5."; + } + return data_ptr; + } + + // Converts a mxnet::TShape to a Shape<> of strides. + // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1} + template + inline Shape Strides(const mxnet::TShape& s) { + int ndim = s.ndim(); + mxnet::TShape strides(ndim, -1); + for (int i = 0; i != ndim; ++i) + strides[i] = s.ProdShape(i + 1, ndim); + return strides.get(); + } + + void InitBufferForParam() { + CastTShapeToIntPtr(param_.stride, ¶m_stride_); + CastTShapeToIntPtr(param_.dilate, ¶m_dilate_); + CastTShapeToIntPtr(param_.pad, ¶m_pad_); + } + + // Round a value 'x' up to the next multiple of 'multiple' + size_t RoundToMultiple(size_t x, size_t multiple) { + size_t retVal = ((x + multiple - 1) / multiple) * multiple; + return retVal; + } + + // Allocates a 1D Tensor of words with size in bytes >= `size_bytes`. + // Always allocates at least one word. + mshadow::Tensor AllocateTempWorkspace(const OpContext& ctx, size_t size_bytes) { + mshadow::Stream* s = ctx.get_stream(); + size_t size_words = + std::max(1, RoundToMultiple(size_bytes, sizeof(DType)) / sizeof(DType)); + return ctx.requested[conv::kTempSpace].get_space_typed( + mshadow::Shape1(size_words), s); + } + + // Returns the size in bytes of the 1D Tensor of words. + size_t TensorSizeBytes(const mshadow::Tensor& tensor) { + return tensor.MSize() * sizeof(DType); + } + + // Given a tensor shape of this operation, return the number of features 'c' + int64_t Features(const mxnet::TShape& dshape) { + int c = 0; + switch (dshape.ndim()) { + case 3: + c = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW)[1]; + break; + case 4: + c = ConvertLayout(dshape.get<4>(), param_.layout.value(), kNCHW)[1]; + break; + case 5: + c = ConvertLayout(dshape.get<5>(), param_.layout.value(), kNCDHW)[1]; + break; + default: + LOG(FATAL) << "Unexpected convolution data dimension " << dshape.ndim(); + } + return c; + } + + // Make a number of allocations and directly free them, ensuring room for an equivalent set of + // cudaMalloc() calls by (say) cudnnFind(). `elements` spec the alloc size in DTypes, not bytes. + void ReserveElements(const std::vector& elements) { + std::vector handles; + for (size_t alloc_element : elements) { + handles.push_back(Storage::Get()->Alloc(alloc_element * sizeof(DType), Context::GPU())); + handles.back().profiler_scope = ":"; + handles.back().name = "reserve_elements"; + } + for (auto& handle : handles) + Storage::Get()->DirectFree(handle); + } + + // Log that no suitable algo was found that met the workspace constraints, then exit. + void LogNoSuitableAlgoAndExit(int num_algos_tried, + size_t min_memory_needs, + size_t workspace_byte, + std::string algo_kind) { + LOG(FATAL) << num_algos_tried << " " << algo_kind << " with minimum memory requirement " + << min_memory_needs << " bytes have been tried. Workspace size is set to " + << workspace_byte << " bytes, please consider reducing the batch/model size, " + << "or increasing workspace size."; + } + + std::vector param_stride_; + std::vector param_dilate_; + std::vector param_pad_; + + // Temp workspace size in bytes needed for Forward() operation. + size_t forward_workspace_byte_; + // Temp workspace size in bytes needed for Backward() dgrad (data gradient) operation. + size_t back_workspace_byte_dgrad_; + // Temp workspace size in bytes needed for Backward() wgrad (weight gradient) operation. + size_t back_workspace_byte_wgrad_; + cudnnDataType_t dtype_; + cudnnTensorDescriptor_t in_desc_; + cudnnTensorDescriptor_t out_desc_; + cudnnTensorDescriptor_t bias_desc_; + cudnnFilterDescriptor_t filter_desc_; + // Convolution descriptor for forward inference operation + cudnnConvolutionDescriptor_t forward_conv_desc_; + // Convolution descriptor for back-prop operations to the data + cudnnConvolutionDescriptor_t back_conv_desc_; + // Convolution descriptor for back-prop operations to the weights + cudnnConvolutionDescriptor_t back_conv_desc_w_; + // Should dgrad and wgrad be launched into separate streams + bool parallelize_backward_kernels_; + // Algorithm for the forward inference operation + CuDNNAlgo forward_algo_; + // Algorithm for the back-prop operation to the data + CuDNNAlgo back_algo_; + // Algorithm for the back-prop operation to the weights + CuDNNAlgo back_algo_w_; + cudnnTensorFormat_t format_; + // Allow TensorCore algo policy + bool cudnn_tensor_core_; + // Is req[kWeight] == conv::kAddTo ? + bool add_to_weight_; + // Is there a dgrad algo that should be avoided (-1 == none)? + int32_t exclude_dgrad_algo_ = -1; + ConvolutionParam param_; +}; +#endif // __CUDACC__ && CUDNN +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NN_CUDNN_CUDNN_CONVOLUTION_INL_H_ diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h new file mode 100644 index 000000000000..b6dddf318d26 --- /dev/null +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -0,0 +1,852 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file cudnn_deconvolution-inl.h + * \brief + * \author Wei Wu, Leonard Lausen + */ +#ifndef MXNET_OPERATOR_NN_CUDNN_CUDNN_DECONVOLUTION_INL_H_ +#define MXNET_OPERATOR_NN_CUDNN_CUDNN_DECONVOLUTION_INL_H_ + +#include +#include +#include +#include +#include +#include "../deconvolution-inl.h" +#include "./cudnn_algoreg-inl.h" +#include "../../../common/cuda/utils.h" + +namespace mxnet { +namespace op { +#if MXNET_USE_CUDNN == 1 + +template +class CuDNNDeconvolutionOp { + STATIC_ASSERT_CUDNN_VERSION_GE(7000); + + public: + CuDNNDeconvolutionOp() { + CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc_)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&bias_desc_)); + CUDNN_CALL(cudnnCreateFilterDescriptor(&filter_desc_)); + CUDNN_CALL(cudnnCreateConvolutionDescriptor(&forward_conv_desc_)); + CUDNN_CALL(cudnnCreateConvolutionDescriptor(&back_conv_desc_)); + CUDNN_CALL(cudnnCreateConvolutionDescriptor(&back_conv_desc_w_)); + } + + void Init(DeconvolutionParam param, + int forward_compute_type, + int backward_compute_type, + const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + const RunContext& rctx, + bool add_to_weight) { + using namespace mshadow; + this->param_ = param; + this->add_to_weight_ = add_to_weight; + InitBufferForParam(); + auto cudnn_forward_compute_type = convertToCuDNNDataType(forward_compute_type); + auto cudnn_backward_compute_type = convertToCuDNNDataType(backward_compute_type); + // convert MB to words + param_.workspace = (param_.workspace << 20) / sizeof(DType); + dtype_ = mshadow::DataType::kCudnnFlag; + // TensorCore algos only allowed on fp16-I/O deconvolutions if permitted by the global policy. + cudnn_tensor_core_ = DataType::kFlag == kFloat16 && GetEnvAllowTensorCore(); + + auto effective_layout = param_.layout.value(); + switch (effective_layout) { + // 1D convolutions will be executed as 2D convolutions with a height of 1. + case mshadow::kNCW: + effective_layout = mshadow::kNCHW; + break; + case mshadow::kNWC: + effective_layout = mshadow::kNHWC; + break; + case mshadow::kCWN: + effective_layout = mshadow::kCHWN; + break; + default: + break; + } + + MSHADOW_LAYOUT_SWITCH(effective_layout, Layout, { format_ = LayoutType::kCudnnFlag; }); + // Double check to make sure this class supports the operation + if (!Supports(param, forward_compute_type, backward_compute_type, rctx.ctx.dev_id)) + LOG(FATAL) << "Deconvolution parameters not supported by cuDNN implementation."; + + InitDescriptors(in_shape, out_shape, cudnn_forward_compute_type, cudnn_backward_compute_type); + + if (!param_.cudnn_tune) { + param_.cudnn_tune = dmlc::GetEnv("MXNET_CUDNN_AUTOTUNE_DEFAULT", 1); + } + // In cuDNN_v6, dilated convolution descriptors are compatible with only a + // single convolution algorithm. Despite this, we go through the algorithm + // selection process, which will return the only algorithm supported. This + // approach keeps the treatment of convolution cases uniform and will + // naturally respond to more algorithms supporting dilated convolutions in + // future cuDNN releases. + SelectAlgo(rctx, in_shape, out_shape, cudnn_forward_compute_type, cudnn_backward_compute_type); + } + + ~CuDNNDeconvolutionOp() { + CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc_)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc_)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(bias_desc_)); + CUDNN_CALL(cudnnDestroyFilterDescriptor(filter_desc_)); + CUDNN_CALL(cudnnDestroyConvolutionDescriptor(forward_conv_desc_)); + CUDNN_CALL(cudnnDestroyConvolutionDescriptor(back_conv_desc_)); + CUDNN_CALL(cudnnDestroyConvolutionDescriptor(back_conv_desc_w_)); + } + + void Forward(const OpContext& ctx, + const std::vector& in_data, + const std::vector& req, + const std::vector& out_data) { + using namespace mshadow; + size_t expected = param_.no_bias ? 2 : 3; + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(out_data.size(), 1U); + Stream* s = ctx.get_stream(); + GetTempSize(ctx); + Tensor workspace = AllocateTempWorkspace(ctx, forward_workspace_byte_); + size_t workspace_size = TensorSizeBytes(workspace); + + // I/O's should have 2 more dims than the kernel dim + DType* data_ptr = GetNdPtr(in_data[deconv::kData], param_.kernel.ndim() + 2, s); + DType* wmat_ptr = GetNdPtr(in_data[deconv::kWeight], param_.kernel.ndim() + 2, s); + DType* out_ptr = GetNdPtr(out_data[deconv::kOut], param_.kernel.ndim() + 2, s); + + for (uint32_t g = 0; g < param_.num_group; ++g) { + typename DataType::ScaleType alpha = 1.0f; + typename DataType::ScaleType beta = 0.0f; + CUDNN_CALL(cudnnConvolutionBackwardData( + s->dnn_handle_, + &alpha, + filter_desc_, + wmat_ptr + weight_offset_ * g, + in_desc_, + data_ptr + data_offset_ * g, + forward_conv_desc_, // this backward algorithm used for inference + back_algo_.AlgoNumber(), + workspace.dptr_, + workspace_size, + &beta, + out_desc_, + out_ptr + out_offset_ * g)); + if (!param_.no_bias) { + beta = 1.0f; + Tensor bias = in_data[deconv::kBias].get(s); + CUDNN_CALL(cudnnAddTensor(s->dnn_handle_, + &alpha, + bias_desc_, + bias.dptr_ + bias_offset_ * g, + &beta, + out_desc_, + out_ptr + out_offset_ * g)); + } + } + } + + void Backward(const OpContext& ctx, + const std::vector& out_grad, + const std::vector& in_data, + const std::vector& req, + const std::vector& in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + size_t expected = param_.no_bias == 0 ? 3 : 2; + CHECK_EQ(out_grad.size(), 1U); + CHECK_EQ(in_data.size(), param_.no_bias ? 2U : 3U); + CHECK_EQ(in_grad.size(), expected); + Stream* s = ctx.get_stream(); + + // I/O's should have 2 more dims than the kernel dim + DType* grad_ptr = GetNdPtr(out_grad[deconv::kOut], param_.kernel.ndim() + 2, s); + DType* wmat_ptr = GetNdPtr(in_data[deconv::kWeight], param_.kernel.ndim() + 2, s); + DType* gwmat_ptr = GetNdPtr(in_grad[deconv::kWeight], param_.kernel.ndim() + 2, s); + DType* data_ptr = GetNdPtr(in_data[deconv::kData], param_.kernel.ndim() + 2, s); + DType* gdata_ptr = GetNdPtr(in_grad[deconv::kData], param_.kernel.ndim() + 2, s); + + CHECK_NE(req[deconv::kWeight], kWriteInplace); + if (!param_.no_bias) { + CHECK_NE(req[deconv::kBias], kWriteInplace); + } + CHECK_NE(req[deconv::kData], kWriteInplace); + GetTempSize(ctx); + Tensor workspace = AllocateTempWorkspace(ctx, backward_workspace_byte_); + size_t workspace_size = TensorSizeBytes(workspace); + for (uint32_t g = 0; g < param_.num_group; ++g) { + typename DataType::ScaleType alpha = 1.0f; + typename DataType::ScaleType bias_beta = 0.0f; + if (!param_.no_bias && req[deconv::kBias] == kAddTo) { + bias_beta = 1.0f; + } + typename DataType::ScaleType data_beta = req[deconv::kData] == kAddTo ? 1.0f : 0.0f; + typename DataType::ScaleType weight_beta = + req[deconv::kWeight] == kAddTo ? 1.0f : 0.0f; + if (req[deconv::kWeight] != kNullOp) { + CHECK_EQ(add_to_weight_, req[deconv::kWeight] == kAddTo); + CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_, + &alpha, + out_desc_, + grad_ptr + out_offset_ * g, + in_desc_, + data_ptr + data_offset_ * g, + back_conv_desc_, + back_algo_w_.AlgoNumber(), + workspace.dptr_, + workspace_size, + &weight_beta, + filter_desc_, + gwmat_ptr + weight_offset_ * g)); + } + if (!param_.no_bias && (req[deconv::kBias] != kNullOp)) { + Tensor gbias = in_grad[deconv::kBias].get(s); + CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_, + &alpha, + out_desc_, + grad_ptr + out_offset_ * g, + &bias_beta, + bias_desc_, + gbias.dptr_ + bias_offset_ * g)); + } + if (req[deconv::kData] != kNullOp) { + CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_, + &alpha, + out_desc_, + grad_ptr + out_offset_ * g, + filter_desc_, + wmat_ptr + weight_offset_ * g, + back_conv_desc_, + forward_algo_.AlgoNumber(), + workspace.dptr_, + workspace_size, + &data_beta, + in_desc_, + gdata_ptr + data_offset_ * g)); + } + } + } + + /*! + * \brief Returns whether the cuDNN library version supports the deconvolution + * operation described by `param`: cuDNN v5 and earlier does not support + * dilated convolutions. + */ + static bool Supports(DeconvolutionParam param, + int forward_compute_type, + int backward_compute_type, + int dev_id) { + using namespace mshadow; + + // NDHWC not supported, NHWC not supported in true fp16 + auto layout_val = param.layout.value(); + auto true_fp16 = DataType::kFlag == kFloat16 && + (forward_compute_type == kFloat16 || backward_compute_type == kFloat16); + if (layout_val == kNDHWC || layout_val == kNWC || layout_val == kNHWC && true_fp16) + return false; + + // Permits graceful fallback to pseudo-fp16 on heterogenous systems + if (!SupportsFloat16Compute(dev_id) && + (forward_compute_type == kFloat16 || backward_compute_type == kFloat16)) { + return false; + } + + // The factor by which the effective filter size grows based on dilation. + auto filterDilationFactor = param.dilate.Size(); + + return true; + } + + private: + /*! + * \brief Translate an mxnet datatype to the corresponding cudnnDataType_t. + */ + cudnnDataType_t convertToCuDNNDataType(int dtype) { + cudnnDataType_t converted = CUDNN_DATA_FLOAT; + // The following will always assign to `converted` or throw an exception. + MSHADOW_REAL_TYPE_SWITCH( + dtype, mxDType, { converted = mshadow::DataType::kCudnnFlag; }) + return converted; + } + + inline void InitDescriptors(const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + cudnnDataType_t cudnn_forward_compute_type, + cudnnDataType_t cudnn_backward_compute_type) { + using namespace mshadow; + size_t expected = param_.no_bias ? 2 : 3; + CHECK_EQ(in_shape.size(), expected); + CHECK_EQ(out_shape.size(), 1U); + + mxnet::TShape dshape = in_shape[deconv::kData]; + mxnet::TShape wshape = in_shape[deconv::kWeight]; + mxnet::TShape oshape = out_shape[deconv::kOut]; + mxnet::TShape dstride, ostride; + wshape[0] /= param_.num_group; + if (param_.kernel.ndim() == 1 || param_.kernel.ndim() == 2) { + // 1d or 2d conv + index_t o_pad[2]; + index_t o_adj[2]; + if (param_.kernel.ndim() == 2) { + param_.InferPad(dshape, o_pad, o_adj); + } else { + index_t o_pad_1D[1]; + index_t o_adj_1D[1]; + param_.InferPad(dshape, o_pad_1D, o_adj_1D); + o_pad[0] = 0; + o_pad[1] = o_pad_1D[0]; + } + auto stride = + param_.kernel.ndim() == 2 ? param_.stride : mxnet::TShape({1, param_.stride[0]}); + auto dilate = + param_.kernel.ndim() == 2 ? param_.dilate : mxnet::TShape({1, param_.dilate[0]}); + + CUDNN_CALL(cudnnSetConvolution2dDescriptor(forward_conv_desc_, + o_pad[0], + o_pad[1], + stride[0], + stride[1], + dilate[0], + dilate[1], + CUDNN_CROSS_CORRELATION, + cudnn_forward_compute_type)); + CUDNN_CALL(cudnnSetConvolution2dDescriptor(back_conv_desc_, + o_pad[0], + o_pad[1], + stride[0], + stride[1], + dilate[0], + dilate[1], + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + CUDNN_CALL(cudnnSetConvolution2dDescriptor(back_conv_desc_w_, + o_pad[0], + o_pad[1], + stride[0], + stride[1], + dilate[0], + dilate[1], + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + if (param_.kernel.ndim() == 2) { + wshape = ConvertLayout(wshape.get<4>(), param_.layout.value(), kNCHW); + dstride = ConvertLayout(Strides<4>(dshape), param_.layout.value(), kNCHW); + dshape = ConvertLayout(dshape.get<4>(), param_.layout.value(), kNCHW); + ostride = ConvertLayout(Strides<4>(oshape), param_.layout.value(), kNCHW); + oshape = ConvertLayout(oshape.get<4>(), param_.layout.value(), kNCHW); + } else { + wshape = ConvertLayout(wshape.get<3>(), param_.layout.value(), kNCW); + wshape = mxnet::TShape({wshape[0], wshape[1], 1, wshape[2]}); + dstride = ConvertLayout(Strides<3>(dshape), param_.layout.value(), kNCW); + dstride = mxnet::TShape({dstride[0], dstride[1], dstride[1], dstride[2]}); + dshape = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW); + dshape = mxnet::TShape({dshape[0], dshape[1], 1, dshape[2]}); + ostride = ConvertLayout(Strides<3>(oshape), param_.layout.value(), kNCW); + ostride = mxnet::TShape({ostride[0], ostride[1], ostride[1], ostride[2]}); + oshape = ConvertLayout(oshape.get<3>(), param_.layout.value(), kNCW); + oshape = mxnet::TShape({oshape[0], oshape[1], 1, oshape[2]}); + } + CUDNN_CALL(cudnnSetFilter4dDescriptor( + filter_desc_, dtype_, format_, wshape[0], wshape[1], wshape[2], wshape[3])); +#if CUDNN_VERSION >= 7301 && CUDNN_VERSION < 7500 + auto kernel_h = wshape[2]; + auto kernel_w = wshape[3]; + auto stride_h = stride[0]; + auto stride_w = stride[1]; + auto pad_h = o_pad[0]; + auto pad_w = o_pad[1]; + if (param_.layout.value() == kNCHW && + (((stride_h == 2) && (kernel_h % 2 == 0) && (pad_h % 2 == 0)) || + ((stride_w == 2) && (kernel_w % 2 == 0) && (pad_w % 2 == 0)))) { + exclude_dgrad_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING; + } +#endif + } else if (param_.kernel.ndim() == 3) { + // 3d conv + index_t o_pad[3]; + index_t o_adj[3]; + param_.InferPad(dshape, o_pad, o_adj); + + CHECK_EQ(param_.layout.value(), kNCDHW) << "CuDNN only support 3D conv with NCDHW layout"; + std::vector wshape_buffer(wshape.ndim()); + CUDNN_CALL(cudnnSetFilterNdDescriptor(filter_desc_, + dtype_, + CUDNN_TENSOR_NCHW, + static_cast(wshape.ndim()), + CastTShapeToIntPtr(wshape, &wshape_buffer))); + CUDNN_CALL(cudnnSetConvolutionNdDescriptor(forward_conv_desc_, + 3, + reinterpret_cast(&o_pad[0]), + param_stride_.data(), + param_dilate_.data(), + CUDNN_CROSS_CORRELATION, + cudnn_forward_compute_type)); + + CUDNN_CALL(cudnnSetConvolutionNdDescriptor(back_conv_desc_, + 3, + reinterpret_cast(&o_pad[0]), + param_stride_.data(), + param_dilate_.data(), + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + + CUDNN_CALL(cudnnSetConvolutionNdDescriptor(back_conv_desc_w_, + 3, + reinterpret_cast(&o_pad[0]), + param_stride_.data(), + param_dilate_.data(), + CUDNN_CROSS_CORRELATION, + cudnn_backward_compute_type)); + + dstride = ConvertLayout(Strides<5>(dshape), param_.layout.value(), kNCDHW); + dshape = ConvertLayout(dshape.get<5>(), param_.layout.value(), kNCDHW); + ostride = ConvertLayout(Strides<5>(oshape), param_.layout.value(), kNCDHW); + oshape = ConvertLayout(oshape.get<5>(), param_.layout.value(), kNCDHW); + } + // Set "allow tensor core" flag in convolution descriptors, if available. + cudnnMathType_t math_type = cudnn_tensor_core_ ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH; + CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type)); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type)); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type)); + dshape[1] /= param_.num_group; + oshape[1] /= param_.num_group; + weight_offset_ = wshape.Size(); + data_offset_ = dstride[1] * dshape[1]; + out_offset_ = ostride[1] * oshape[1]; + + std::vector dshape_buffer(dshape.ndim()); + std::vector dstride_buffer(dstride.ndim()); + CUDNN_CALL(cudnnSetTensorNdDescriptor(in_desc_, + dtype_, + static_cast(dshape.ndim()), + CastTShapeToIntPtr(dshape, &dshape_buffer), + CastTShapeToIntPtr(dstride, &dstride_buffer))) + + std::vector oshape_buffer(oshape.ndim()); + std::vector ostride_buffer(ostride.ndim()); + CUDNN_CALL(cudnnSetTensorNdDescriptor(out_desc_, + dtype_, + static_cast(oshape.ndim()), + CastTShapeToIntPtr(oshape, &oshape_buffer), + CastTShapeToIntPtr(ostride, &ostride_buffer))); + + if (!param_.no_bias) { + mxnet::TShape bias = in_shape[deconv::kBias]; + bias_offset_ = bias[0] / param_.num_group; + int bias_dim = static_cast(bias_offset_); + std::vector bias_shape = {1, bias_dim, 1, 1}; + std::vector bias_stride = {bias_dim, 1, bias_dim, bias_dim}; + if (param_.kernel.ndim() == 3) { + bias_shape.push_back(1); + bias_stride.push_back(bias_dim); + } + CUDNN_CALL(cudnnSetTensorNdDescriptor(bias_desc_, + dtype_, + static_cast(bias_shape.size()), + &bias_shape[0], + &bias_stride[0])); + } + } + + void CuDNNAlgoSetter(const RunContext& rctx, + const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + cudnnDataType_t cudnn_forward_compute_type, + cudnnDataType_t cudnn_backward_compute_type, + CuDNNAlgo* fwd, + CuDNNAlgo* bwd, + CuDNNAlgo* flt) { + // Not in algo registry, must determine via *Get*() or *Find*() + mshadow::Stream* s = rctx.get_stream(); + CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); + size_t workspace_byte = static_cast(param_.workspace * sizeof(DType)); + + // Since the function signature of *Get*_v7() matches that of *Find*(), + // we can unify the find-vs-get logic by using function pointers. + + // Forward Algorithm Find/Get() v7 + std::vector fwd_results(MaxForwardAlgos(s->dnn_handle_)); + int actual_fwd_algos = 0; + auto fwd_algo_discoverer = param_.cudnn_tune.value() == deconv::kOff ? + cudnnGetConvolutionForwardAlgorithm_v7 : + cudnnFindConvolutionForwardAlgorithm; + CUDNN_CALL((*fwd_algo_discoverer)(s->dnn_handle_, + out_desc_, + filter_desc_, + back_conv_desc_, // fwd algo used to backprop-to-data + in_desc_, + fwd_results.size(), + &actual_fwd_algos, + fwd_results.data())); + fwd_results.resize(actual_fwd_algos); + AlgoFinalSelect( + fwd_results, "forward", workspace_byte, fwd); + + // Backprop-to-Filter Algorithm Find/Get() v7 + auto max_bwd_filt_algos = MaxBackwardFilterAlgos(s->dnn_handle_); + std::vector bwd_filt_results(max_bwd_filt_algos); + int actual_bwd_filter_algos = 0; + // In cudnn v7.1.4, find() returned wgrad algos that could fail for large c if we + // were summing into the output (i.e. beta != 0). Get() returned OK algos though. + auto bwd_filter_algo_discoverer = param_.cudnn_tune.value() == deconv::kOff ? + cudnnGetConvolutionBackwardFilterAlgorithm_v7 : + cudnnFindConvolutionBackwardFilterAlgorithm; + CUDNN_CALL((*bwd_filter_algo_discoverer)(s->dnn_handle_, + out_desc_, + in_desc_, + back_conv_desc_, + filter_desc_, + bwd_filt_results.size(), + &actual_bwd_filter_algos, + bwd_filt_results.data())); + bwd_filt_results.resize(actual_bwd_filter_algos); + AlgoFinalSelect( + bwd_filt_results, "backprop-to-filter", workspace_byte, flt); + // Backprop-to-Data Algorithm Find/Get() v7 + auto max_bwd_data_algos = MaxBackwardDataAlgos(s->dnn_handle_); + std::vector bwd_data_results(max_bwd_data_algos); + int actual_bwd_data_algos = 0; + auto bwd_data_algo_discoverer = param_.cudnn_tune.value() == deconv::kOff ? + cudnnGetConvolutionBackwardDataAlgorithm_v7 : + cudnnFindConvolutionBackwardDataAlgorithm; + CUDNN_CALL((*bwd_data_algo_discoverer)(s->dnn_handle_, + filter_desc_, + in_desc_, + forward_conv_desc_, // bwd algo used in inference + out_desc_, + bwd_data_results.size(), + &actual_bwd_data_algos, + bwd_data_results.data())); + bwd_data_results.resize(actual_bwd_data_algos); + AlgoFinalSelect( + bwd_data_results, "backprop-to-data", workspace_byte, bwd, exclude_dgrad_algo_); + + // Fix for issue #11241 + int cudnn_find_issue_max_features = 64 * 1024; + // With deconvolution, the algo sensitivity is to a large number of output features + if (add_to_weight_ && Features(out_shape[deconv::kOut]) >= cudnn_find_issue_max_features) { + flt->Set(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true); + } + } + + void SelectAlgo(const RunContext& rctx, + const mxnet::ShapeVector& in_shape, + const mxnet::ShapeVector& out_shape, + cudnnDataType_t cudnn_forward_compute_type, + cudnnDataType_t cudnn_backward_compute_type) { + auto algo_setter = [&](CuDNNAlgo* fwd, + CuDNNAlgo* bwd, + CuDNNAlgo* flt) { + if (param_.cudnn_tune.value() == deconv::kOff) { + // The routine will only be calling cudnnGet, so no need to grab the Storage lock. + this->CuDNNAlgoSetter(rctx, + in_shape, + out_shape, + cudnn_forward_compute_type, + cudnn_backward_compute_type, + fwd, + bwd, + flt); + } else { + // One potential problem is that cudnnFind() uses cudaMalloc() to directly allocate + // I/O and workspace areas, and these allocations may result in an out-of-memory + // error even though the StorageMangager free pool is not empty. Ideally, cudnnFind + // would use MXNet's storage allocator for its I/O and workspace areas, instead of using + // the area carved out by MXNET_GPU_MEM_POOL_RESERVE. + // To get somewhat the same effect as this, we can pre-allocate the areas needed for the + // I/Os (possibly triggering a desirable StorageManager::ReleaseAll()), followed by a + // DirectFree(), which makes these areas available for cudnn's subsequent cudaMalloc(). + + // Allocate for x (or dx), w (or dw) and y (or dy). + ReserveElements({in_shape[deconv::kData].Size(), + in_shape[deconv::kWeight].Size(), + out_shape[deconv::kOut].Size()}); + + // We're about to call cudnnFind so we need to quiet the system by grabbing + // the Storage lock. Concurrent cudaMalloc's can disrupt the accurate timing + // measurements of the algos, and can prevent the cuda driver's proper freeing + // of cudnnFind's internal temporary allocations. Grabbing the lock might also + // impede other threads from launching work on the GPU. + std::lock_guard lock(Storage::Get()->GetMutex(Context::kGPU)); + this->CuDNNAlgoSetter(rctx, + in_shape, + out_shape, + cudnn_forward_compute_type, + cudnn_backward_compute_type, + fwd, + bwd, + flt); + } + }; + + // An algo specification by the user may be cached here, but another + // convolution will match only if identically specified. + // We're caching results of *Get* as well as *Find*, but these records + // will be held distinctly because param_.cudnn_tune is part of the key. + CuDNNDeconvAlgoReg::Get()->FindOrElseRegister(param_, + in_shape, + out_shape, + dtype_, + cudnn_forward_compute_type, + cudnn_backward_compute_type, + SMArch(rctx.ctx.dev_id), + add_to_weight_, + &forward_algo_, + &back_algo_, + &back_algo_w_, + algo_setter); + + // If we're allowing Tensor Core variants of the algos to be considered in + // *Find*() or *Get*(), but a non-Tensor-Core algo variant is the fastest, + // we must change the descriptor to preclude Tensor Core. Simplest is to + // once again set the mathType in all cases. + + // The next two code lines will look like they have typos, but they don't! + // The forward_conv_desc_ is used during inference, which invokes the back_algo_. + // Thus, the mathType of the back_algo_ should be stored in the forward_conv_desc_. + // Conversely, the back_conv_desc_ is used during training backprop, which invokes + // the forward_algo_. Thus, the mathType of the forward_algo_ should be stored + // in the back_conv_desc_. + CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, back_algo_.MathType())); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, forward_algo_.MathType())); + CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, back_algo_w_.MathType())); + } + + // Look over the results from *Find*() or *Get*() and pick the fastest algo given possible + // workspace constraints and a possible user algo preference. + template + void AlgoFinalSelect(const std::vector& perf_results, + std::string kernel_name, + size_t workspace_byte, + CuDNNAlgo* algo, + int32_t algo_exclude = -1) { + // Determine the fastest acceptable algo regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); + for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { + const auto& result = perf_results[i]; + bool algo_exclusion = static_cast(result.algo) == algo_exclude; + bool algo_is_tensor_core = false; + algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; + if (result.status == CUDNN_STATUS_SUCCESS && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + (param_.cudnn_tune.value() != deconv::kLimited || result.memory <= workspace_byte) && + !algo_exclusion) { + algo->Set(result.algo, algo_is_tensor_core); + return; + } + } + auto mode = param_.cudnn_tune.value() == deconv::kOff ? " get " : " find "; + LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " deconvolution algorithm" + << " with workspace size of " << workspace_byte << " bytes," + << " please consider reducing batch/model size or increasing the workspace size"; + } + + void GetTempSize(const OpContext& ctx) { + mshadow::Stream* s = ctx.get_stream(); + size_t back_data_algo_workspace_size = 0; + size_t back_filter_algo_workspace_size = 0; + size_t forward_algo_workspace_size = 0; + CUDNN_CALL(cudnnGetConvolutionBackwardDataWorkspaceSize(s->dnn_handle_, + filter_desc_, + in_desc_, + forward_conv_desc_, + out_desc_, + back_algo_.AlgoNumber(), + &back_data_algo_workspace_size)); + CUDNN_CALL(cudnnGetConvolutionBackwardFilterWorkspaceSize(s->dnn_handle_, + out_desc_, + in_desc_, + back_conv_desc_, + filter_desc_, + back_algo_w_.AlgoNumber(), + &back_filter_algo_workspace_size)); + CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(s->dnn_handle_, + out_desc_, + filter_desc_, + back_conv_desc_, + in_desc_, + forward_algo_.AlgoNumber(), + &forward_algo_workspace_size)); + + forward_workspace_byte_ = back_data_algo_workspace_size; + backward_workspace_byte_ = + std::max(forward_algo_workspace_size, back_filter_algo_workspace_size); + } + + int* CastTShapeToIntPtr(const mxnet::TShape& s, std::vector* buffer) { + buffer->resize(s.ndim()); + nnvm::ShapeTypeCast(s.begin(), s.end(), buffer->data()); + return buffer->data(); + } + + // Converts a TBlob to a dptr, checking for the expected dim and that it's contiguous. + DType* GetNdPtr(const TBlob& tb, int dim, Stream* s) { + DType* data_ptr = nullptr; + if (dim == 3) { + Tensor data = tb.get(s); + CHECK_EQ(data.CheckContiguous(), true); + data_ptr = data.dptr_; + } else if (dim == 4) { + Tensor data = tb.get(s); + CHECK_EQ(data.CheckContiguous(), true); + data_ptr = data.dptr_; + } else if (dim == 5) { + Tensor data = tb.get(s); + CHECK_EQ(data.CheckContiguous(), true); + data_ptr = data.dptr_; + } else { + LOG(FATAL) << "Unexpected Tensor size " << dim << ", supporting only 3, 4 or 5."; + } + return data_ptr; + } + + // Converts a mxnet::TShape to a Shape<> of strides. + // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1} + template + inline Shape Strides(const mxnet::TShape& s) { + int ndim = s.ndim(); + mxnet::TShape strides(ndim, -1); + for (int i = 0; i != ndim; ++i) + strides[i] = s.ProdShape(i + 1, ndim); + return strides.get(); + } + + void InitBufferForParam() { + CastTShapeToIntPtr(param_.stride, ¶m_stride_); + CastTShapeToIntPtr(param_.dilate, ¶m_dilate_); + } + + // Allocates a 1D Tensor of words with size in bytes >= `size_bytes`. + // Always allocates at least one word. + mshadow::Tensor AllocateTempWorkspace(const OpContext& ctx, size_t size_bytes) { + mshadow::Stream* s = ctx.get_stream(); + size_t size_words = size_bytes / sizeof(DType) + 1; + return ctx.requested[deconv::kTempSpace].get_space_typed( + mshadow::Shape1(size_words), s); + } + + // Returns the size in bytes of the 1D Tensor of words. + size_t TensorSizeBytes(const mshadow::Tensor& tensor) { + return tensor.MSize() * sizeof(DType); + } + + // Given a tensor shape of this operation, return the number of features 'c' + int64_t Features(const mxnet::TShape& dshape) { + int c = 0; + switch (dshape.ndim()) { + case 3: + c = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW)[1]; + break; + case 4: + c = ConvertLayout(dshape.get<4>(), param_.layout.value(), kNCHW)[1]; + break; + case 5: + c = ConvertLayout(dshape.get<5>(), param_.layout.value(), kNCDHW)[1]; + break; + default: + LOG(FATAL) << "Unexpected deconvolution data dimension " << dshape.ndim(); + } + return c; + } + + // Make a number of allocations and directly free them, ensuring room for an equivalent set of + // cudaMalloc() calls by (say) cudnnFind(). `elements` spec the alloc size in DTypes, not bytes. + void ReserveElements(const std::vector& elements) { + std::vector handles; + for (size_t alloc_element : elements) { + handles.push_back(Storage::Get()->Alloc(alloc_element * sizeof(DType), Context::GPU())); + handles.back().profiler_scope = ":"; + handles.back().name = "reserve_elements"; + } + for (auto& handle : handles) + Storage::Get()->DirectFree(handle); + } + + // Log that no suitable algo was found that met the workspace constraints, then exit. + void LogNoSuitableAlgoAndExit(int num_algos_tried, + size_t min_memory_needs, + size_t workspace_byte, + std::string algo_kind) { + LOG(FATAL) << num_algos_tried << " " << algo_kind << " with minimum memory requirement " + << min_memory_needs << " bytes have been tried. Workspace size is set to " + << workspace_byte << " bytes, please consider reducing the batch/model size, " + << "or increasing workspace size."; + } + + std::vector param_stride_; + std::vector param_dilate_; + + int forward_compute_type_; + int backward_compute_type_; + const mxnet::ShapeVector in_shapes_; + const mxnet::ShapeVector out_shapes_; + + // Temp workspace size in bytes needed for Forward() operation. Note that + // in deconvolution, this is handled by the cuDNN backprop-to-data kernel. + size_t forward_workspace_byte_; + // Temp workspace size in bytes needed for Backward() operation. Note that + // in deconvolution, this is handled by the cuDNN forward kernel and the + // the cuDNN backprop-to-filter kernel. + size_t backward_workspace_byte_; + size_t data_offset_; + size_t out_offset_; + size_t weight_offset_; + size_t bias_offset_; + cudnnDataType_t dtype_; + cudnnTensorDescriptor_t in_desc_; + cudnnTensorDescriptor_t out_desc_; + cudnnTensorDescriptor_t bias_desc_; + cudnnFilterDescriptor_t filter_desc_; + // Convolution descriptor for "forward" inference operation. + // Note that in deconvolution, the forward operation is handled + // by the cuDNN backprop-to-data kernel. + cudnnConvolutionDescriptor_t forward_conv_desc_; + // Convolution descriptor for "back-prop" operations to data . + // Note that in deconvolution, the backprop-to-data operation is handled + // by the cuDNN forward kernel. + cudnnConvolutionDescriptor_t back_conv_desc_; + // Convolution descriptor for "back-prop" operations to filter. + // Note that in deconvolution, the backprop-to-data operation is handled + // by the backprop-to-filter kernel (so consistent with the treatment + // in convolution). + cudnnConvolutionDescriptor_t back_conv_desc_w_; + // Algorithm for the cuDNN forward kernel (used in gradient backprop to input) + CuDNNAlgo forward_algo_; + // Algorithm for the cuDNN backprop-to-data kernel (used in inference) + CuDNNAlgo back_algo_; + // Algorithm for the cuDNN backprop-to-filter kernel + CuDNNAlgo back_algo_w_; + cudnnTensorFormat_t format_; + // Allow TensorCore algo policy + bool cudnn_tensor_core_; + // Is req[kWeight] == deconv::kAddTo ? + bool add_to_weight_; + // Is there a dgrad algo that should be avoided (-1 == none)? + int32_t exclude_dgrad_algo_ = -1; + DeconvolutionParam param_; +}; +#endif // CUDNN +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NN_CUDNN_CUDNN_DECONVOLUTION_INL_H_ diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index b807234e1d7b..ad7872025ee9 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -49,8 +49,8 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: - mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC - : CUDNN_POOLING_MAX; + mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC : + CUDNN_POOLING_MAX; break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) { @@ -210,8 +210,8 @@ class CuDNNPoolingOp { // Perform shape calculations in a standard (NCHW) layout space mshadow::Shape<4> input_shape = input.shape_.get<4>(); mshadow::Shape<4> dshape_nchw = - (layout == mshadow::kNHWC) ? ConvertLayout(input_shape, mshadow::kNHWC, mshadow::kNCHW) - : input_shape; + (layout == mshadow::kNHWC) ? ConvertLayout(input_shape, mshadow::kNHWC, mshadow::kNCHW) : + input_shape; int kernel_height = param.global_pool ? dshape_nchw[2] : param.kernel[0]; int kernel_width = param.global_pool ? dshape_nchw[3] : param.kernel[1]; if (kernel_height > 8 || kernel_width > 8) @@ -258,11 +258,11 @@ class CuDNNPoolingOp { Tensor out = out_data.get(s); // Perform shape calculations in a standard (NCHW) layout space mshadow::Shape<4> dshape_nchw = - (layout == mshadow::kNHWC) ? ConvertLayout(data.shape_, mshadow::kNHWC, mshadow::kNCHW) - : data.shape_; + (layout == mshadow::kNHWC) ? ConvertLayout(data.shape_, mshadow::kNHWC, mshadow::kNCHW) : + data.shape_; mshadow::Shape<4> oshape_nchw = - (layout == mshadow::kNHWC) ? ConvertLayout(out.shape_, mshadow::kNHWC, mshadow::kNCHW) - : out.shape_; + (layout == mshadow::kNHWC) ? ConvertLayout(out.shape_, mshadow::kNHWC, mshadow::kNCHW) : + out.shape_; CUDNN_CALL(cudnnSetTensor4dDescriptor(in_desc_, cudnn_layout, dtype_, @@ -314,18 +314,18 @@ class CuDNNPoolingOp { oshape.ProdShape(5, 5)); // Convert to a standard (NCDHW) layout space to create args for cuDNN - mshadow::Shape<5> dshape_ncdhw = (layout == mshadow::kNDHWC) - ? ConvertLayout(dshape, mshadow::kNDHWC, mshadow::kNCDHW) - : dshape; + mshadow::Shape<5> dshape_ncdhw = (layout == mshadow::kNDHWC) ? + ConvertLayout(dshape, mshadow::kNDHWC, mshadow::kNCDHW) : + dshape; mshadow::Shape<5> dstride_ncdhw = - (layout == mshadow::kNDHWC) ? ConvertLayout(dstride, mshadow::kNDHWC, mshadow::kNCDHW) - : dstride; - mshadow::Shape<5> oshape_ncdhw = (layout == mshadow::kNDHWC) - ? ConvertLayout(oshape, mshadow::kNDHWC, mshadow::kNCDHW) - : oshape; + (layout == mshadow::kNDHWC) ? ConvertLayout(dstride, mshadow::kNDHWC, mshadow::kNCDHW) : + dstride; + mshadow::Shape<5> oshape_ncdhw = (layout == mshadow::kNDHWC) ? + ConvertLayout(oshape, mshadow::kNDHWC, mshadow::kNCDHW) : + oshape; mshadow::Shape<5> ostride_ncdhw = - (layout == mshadow::kNDHWC) ? ConvertLayout(ostride, mshadow::kNDHWC, mshadow::kNCDHW) - : ostride; + (layout == mshadow::kNDHWC) ? ConvertLayout(ostride, mshadow::kNDHWC, mshadow::kNCDHW) : + ostride; // Create int arrays for passing into cuDNN std::array dshape_ncdhw_int, dstride_ncdhw_int, oshape_ncdhw_int, ostride_ncdhw_int; for (int i = 0; i < 5; ++i) { @@ -335,12 +335,12 @@ class CuDNNPoolingOp { ostride_ncdhw_int[i] = static_cast(ostride_ncdhw[i]); } - std::array kernel_vec = {param_.global_pool ? static_cast(dshape_ncdhw[2]) - : static_cast(param_.kernel[0]), - param_.global_pool ? static_cast(dshape_ncdhw[3]) - : static_cast(param_.kernel[1]), - param_.global_pool ? static_cast(dshape_ncdhw[4]) - : static_cast(param_.kernel[2])}; + std::array kernel_vec = {param_.global_pool ? static_cast(dshape_ncdhw[2]) : + static_cast(param_.kernel[0]), + param_.global_pool ? static_cast(dshape_ncdhw[3]) : + static_cast(param_.kernel[1]), + param_.global_pool ? static_cast(dshape_ncdhw[4]) : + static_cast(param_.kernel[2])}; std::array pad_vec = {param_.global_pool ? 0 : static_cast(param_.pad[0]), param_.global_pool ? 0 : static_cast(param_.pad[1]), diff --git a/src/operator/nn/dnnl/dnnl_base-inl.h b/src/operator/nn/dnnl/dnnl_base-inl.h index 4bf8b372b4ef..3ec2e32750b8 100644 --- a/src/operator/nn/dnnl/dnnl_base-inl.h +++ b/src/operator/nn/dnnl/dnnl_base-inl.h @@ -607,9 +607,9 @@ class DNNLMemory { dnnl::memory::data_type data_type = dnnl::memory::data_type::undef) const { dnnl::memory::dims dims(desc.data.dims, desc.data.dims + desc.data.ndims); dnnl::memory::data_type cpp_type = - (data_type == dnnl::memory::data_type::undef) - ? static_cast(desc.data.data_type) - : data_type; + (data_type == dnnl::memory::data_type::undef) ? + static_cast(desc.data.data_type) : + data_type; dnnl::memory::desc data_md(dims, cpp_type, static_cast(format)); return data_md; } diff --git a/src/operator/nn/dnnl/dnnl_base.cc b/src/operator/nn/dnnl/dnnl_base.cc index d1e8918c3bde..54af44c80fe4 100644 --- a/src/operator/nn/dnnl/dnnl_base.cc +++ b/src/operator/nn/dnnl/dnnl_base.cc @@ -242,31 +242,30 @@ const dnnl::memory* GetWeights(const NDArray& arr, int num_groups) { tz = dnnl::memory::dims{arr.shape()[O], arr.shape()[I]}; format_tag = dnnl::memory::format_tag::oi; } else if (ndim == 3) { - tz = num_groups > 1 ? dnnl::memory::dims{num_groups, - arr.shape()[O] / num_groups, - arr.shape()[I], - arr.shape()[H]} - : dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H]}; + tz = num_groups > 1 ? + dnnl::memory::dims{ + num_groups, arr.shape()[O] / num_groups, arr.shape()[I], arr.shape()[H]} : + dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H]}; format_tag = num_groups > 1 ? dnnl::memory::format_tag::goiw : dnnl::memory::format_tag::oiw; } else if (ndim == 4) { - tz = num_groups > 1 - ? dnnl::memory::dims{num_groups, - arr.shape()[O] / num_groups, - arr.shape()[I], - arr.shape()[H], - arr.shape()[W]} - : dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H], arr.shape()[W]}; + tz = num_groups > 1 ? + dnnl::memory::dims{num_groups, + arr.shape()[O] / num_groups, + arr.shape()[I], + arr.shape()[H], + arr.shape()[W]} : + dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H], arr.shape()[W]}; format_tag = num_groups > 1 ? dnnl::memory::format_tag::goihw : dnnl::memory::format_tag::oihw; } else if (ndim == 5) { - tz = num_groups > 1 - ? dnnl::memory::dims{num_groups, - arr.shape()[O] / num_groups, - arr.shape()[I], - arr.shape()[D], - arr.shape()[H], - arr.shape()[W]} - : dnnl::memory::dims{ - arr.shape()[O], arr.shape()[I], arr.shape()[D], arr.shape()[H], arr.shape()[W]}; + tz = num_groups > 1 ? + dnnl::memory::dims{num_groups, + arr.shape()[O] / num_groups, + arr.shape()[I], + arr.shape()[D], + arr.shape()[H], + arr.shape()[W]} : + dnnl::memory::dims{ + arr.shape()[O], arr.shape()[I], arr.shape()[D], arr.shape()[H], arr.shape()[W]}; format_tag = num_groups > 1 ? dnnl::memory::format_tag::goidhw : dnnl::memory::format_tag::oidhw; } else { diff --git a/src/operator/nn/dnnl/dnnl_convolution.cc b/src/operator/nn/dnnl/dnnl_convolution.cc index 9754f7fa4505..7910f65d21eb 100644 --- a/src/operator/nn/dnnl/dnnl_convolution.cc +++ b/src/operator/nn/dnnl/dnnl_convolution.cc @@ -53,8 +53,8 @@ std::shared_ptr GetConvFwdImpl( auto weight_md = GetWeightDesc(weights, param.conv_param.num_group, param.dnnl_param.quantized); auto out_md = GetMemDesc(output); auto bias_md = - bias ? (param.dnnl_param.quantized ? GetMemDesc(*bias, mshadow::kInt32) : GetMemDesc(*bias)) - : dnnl::memory::desc{{}, dnnl::memory::data_type::undef, dnnl::memory::format_tag::any}; + bias ? (param.dnnl_param.quantized ? GetMemDesc(*bias, mshadow::kInt32) : GetMemDesc(*bias)) : + dnnl::memory::desc{{}, dnnl::memory::data_type::undef, dnnl::memory::format_tag::any}; auto bias_md_ptr = bias ? &bias_md : nullptr; dnnl::memory::dims strides(param.conv_param.kernel.ndim()); diff --git a/src/operator/nn/dnnl/dnnl_deconvolution-inl.h b/src/operator/nn/dnnl/dnnl_deconvolution-inl.h index 301537967df3..92c1d6bed1f2 100644 --- a/src/operator/nn/dnnl/dnnl_deconvolution-inl.h +++ b/src/operator/nn/dnnl/dnnl_deconvolution-inl.h @@ -289,9 +289,9 @@ inline const dnnl::memory* DNNLDeconvBwd::OutGradMem(const NDArray& out_grad) co inline const dnnl::memory* DNNLDeconvBwd::OutGradMem(const NDArray& out_grad, const dnnl::memory* const out_grad_mem) const { - return (out_grad_mem && out_grad_mem->get_desc() == bwd_weights_pd->diff_dst_desc()) - ? out_grad_mem - : out_grad.GetDNNLDataReorder(bwd_weights_pd->diff_dst_desc()); + return (out_grad_mem && out_grad_mem->get_desc() == bwd_weights_pd->diff_dst_desc()) ? + out_grad_mem : + out_grad.GetDNNLDataReorder(bwd_weights_pd->diff_dst_desc()); } inline dnnl_output_t DNNLDeconvBwd::DataGradMem(const OpReqType req, @@ -315,8 +315,8 @@ inline dnnl_output_t DNNLDeconvBwd::WeightsGradMem(const uint32_t num_group, inline dnnl_output_t DNNLDeconvBwd::BiasGradMem(const OpReqType req, const NDArray* const bias) const { - return bias ? CreateDNNLMem(*bias, bwd_weights_pd->diff_bias_desc(), req) - : dnnl_output_t(OutDataOp::Noop, nullptr); + return bias ? CreateDNNLMem(*bias, bwd_weights_pd->diff_bias_desc(), req) : + dnnl_output_t(OutDataOp::Noop, nullptr); } // Utility class for creating operation descriptors of deconvolution primitives diff --git a/src/operator/nn/dnnl/dnnl_fully_connected.cc b/src/operator/nn/dnnl/dnnl_fully_connected.cc index 5bb3c9d79ec0..7879497954ae 100644 --- a/src/operator/nn/dnnl/dnnl_fully_connected.cc +++ b/src/operator/nn/dnnl/dnnl_fully_connected.cc @@ -39,9 +39,9 @@ dnnl::inner_product_forward::primitive_desc GetFCFwdImpl(const DNNLFCFullParam& const dnnl::memory::desc& out_md) { auto engine = CpuEngine::Get()->get_engine(); auto data_md = GetMemDesc(data); - auto weight_md = full_param.dnnl_param.quantized - ? GetFCWeightDesc(weight, data.shape()[0], mshadow::kInt8) - : GetFCWeightDesc(weight, data.shape()[0]); + auto weight_md = full_param.dnnl_param.quantized ? + GetFCWeightDesc(weight, data.shape()[0], mshadow::kInt8) : + GetFCWeightDesc(weight, data.shape()[0]); auto propagation = is_train ? dnnl::prop_kind::forward_training : dnnl::prop_kind::forward_scoring; diff --git a/src/operator/nn/dnnl/dnnl_rnn.cc b/src/operator/nn/dnnl/dnnl_rnn.cc index 844bad99c845..5ebad89089c3 100644 --- a/src/operator/nn/dnnl/dnnl_rnn.cc +++ b/src/operator/nn/dnnl/dnnl_rnn.cc @@ -184,9 +184,9 @@ RnnPrimitive GetRnnFwdPrim(const DNNLRnnLayerParam& layer_param, memory::data_type data_type = get_dnnl_type(data.dtype()); memory::data_type weight_type = get_dnnl_type(params.dtype()); const prop_kind prop = is_train ? prop_kind::forward_training : prop_kind::forward_inference; - const rnn_direction dnnl_rnn_direction = layer_param.bidirectional - ? rnn_direction::bidirectional_concat - : rnn_direction::unidirectional; + const rnn_direction dnnl_rnn_direction = layer_param.bidirectional ? + rnn_direction::bidirectional_concat : + rnn_direction::unidirectional; auto src_layer_desc = memory::desc(layer_param.src_dims, data_type, tag::tnc); auto weight_layer_desc = memory::desc(layer_param.weight_layer_dims, weight_type, tag::any); @@ -196,15 +196,15 @@ RnnPrimitive GetRnnFwdPrim(const DNNLRnnLayerParam& layer_param, auto src_state_desc = memory::desc(layer_param.state_dims, data_type, tag::ldnc); auto src_cell_desc = memory::desc(layer_param.cell_dims, data_type, tag::ldnc); auto weight_peep_desc = memory::desc(); - auto weight_proj_desc = layer_param.proj_size > 0 - ? memory::desc(layer_param.weight_proj_dims, weight_type, tag::any) - : memory::desc(); - auto dst_state_desc = layer_param.state_outputs - ? memory::desc(layer_param.state_dims, data_type, tag::ldnc) - : memory::desc(); - auto dst_cell_desc = layer_param.state_outputs - ? memory::desc(layer_param.cell_dims, data_type, tag::ldnc) - : memory::desc(); + auto weight_proj_desc = layer_param.proj_size > 0 ? + memory::desc(layer_param.weight_proj_dims, weight_type, tag::any) : + memory::desc(); + auto dst_state_desc = layer_param.state_outputs ? + memory::desc(layer_param.state_dims, data_type, tag::ldnc) : + memory::desc(); + auto dst_cell_desc = layer_param.state_outputs ? + memory::desc(layer_param.cell_dims, data_type, tag::ldnc) : + memory::desc(); auto fwd = RnnPrimitive(); switch (mode) { @@ -265,8 +265,9 @@ RnnBwdPrimitive GetRnnBwdPrim(const DNNLRnnForwardTraining& fwd, memory::data_type data_type = get_dnnl_type(data.dtype()); memory::data_type weight_type = get_dnnl_type(params.dtype()); const prop_kind prop = prop_kind::backward; - rnn_direction dnnl_rnn_direction = layer_param.bidirectional ? rnn_direction::bidirectional_concat - : rnn_direction::unidirectional; + rnn_direction dnnl_rnn_direction = layer_param.bidirectional ? + rnn_direction::bidirectional_concat : + rnn_direction::unidirectional; auto src_layer_desc = memory::desc(layer_param.src_dims, data_type, tag::tnc); auto weight_layer_desc = memory::desc(layer_param.weight_layer_dims, weight_type, tag::any); @@ -274,9 +275,9 @@ RnnBwdPrimitive GetRnnBwdPrim(const DNNLRnnForwardTraining& fwd, auto bias_desc = memory::desc(layer_param.bias_dims, data_type, tag::ldgo); auto dst_layer_desc = memory::desc(layer_param.dst_dims, data_type, tag::tnc); auto src_state_desc = memory::desc(layer_param.state_dims, data_type, tag::ldnc); - auto dst_state_desc = layer_param.state_outputs - ? memory::desc(layer_param.state_dims, data_type, tag::ldnc) - : memory::desc(); + auto dst_state_desc = layer_param.state_outputs ? + memory::desc(layer_param.state_dims, data_type, tag::ldnc) : + memory::desc(); const void* fwd_pd = fwd.GetPrimDesc(); auto bwd = RnnBwdPrimitive(); @@ -1125,9 +1126,9 @@ void DNNLRnnOp::Forward(const OpContext& ctx, const int seq_length = default_param.seq_length_; const int batch_size = default_param.batch_size_; const int state_size = default_param.state_size; - const int iter_size = default_param.projection_size.has_value() - ? default_param.projection_size.value() - : default_param.state_size; + const int iter_size = default_param.projection_size.has_value() ? + default_param.projection_size.value() : + default_param.state_size; const int directions = default_param.bidirectional ? 2 : 1; dnnl::memory::desc dst_desc({seq_length, batch_size, directions * iter_size}, get_dnnl_type(data_dtype), diff --git a/src/operator/nn/pooling-inl.h b/src/operator/nn/pooling-inl.h index 898309579054..30ad7aa01b54 100644 --- a/src/operator/nn/pooling-inl.h +++ b/src/operator/nn/pooling-inl.h @@ -296,9 +296,9 @@ class PoolingOp { } stride = mxnet::TShape(ishape.ndim() - 2, 1); } - const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) - ? param_.p_value.value() - : 1; + const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ? + param_.p_value.value() : + 1; const bool count_include_pad = (param_.count_include_pad.has_value()) ? param_.count_include_pad.value() : true; switch (p_value) { @@ -377,9 +377,9 @@ class PoolingOp { stride = mxnet::TShape(ishape.ndim() - 2, 1); } - const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) - ? param_.p_value.value() - : 1; + const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ? + param_.p_value.value() : + 1; const bool count_include_pad = (param_.count_include_pad.has_value()) ? param_.count_include_pad.value() : true; switch (p_value) { diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 47114f8cc897..8fe054b54f89 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -157,8 +157,8 @@ static bool PoolingShape(const nnvm::NodeAttrs& attrs, CHECK(layout == mshadow::kNCW || layout == mshadow::kNWC) << "Need 1D layout"; // Perform shape calculations in a standard (NCW) layout space mshadow::Shape<3> dshape_ncw = - (layout == mshadow::kNWC) ? ConvertLayout(dshape.get<3>(), mshadow::kNWC, mshadow::kNCW) - : dshape.get<3>(); + (layout == mshadow::kNWC) ? ConvertLayout(dshape.get<3>(), mshadow::kNWC, mshadow::kNCW) : + dshape.get<3>(); mshadow::Shape<3> oshape_ncw = dshape_ncw; CHECK(param.kernel[0] <= dshape_ncw[2] + 2 * param.pad[0]) << "kernel size (" << param.kernel[0] << ") exceeds input (" << dshape[2] << " padded to " @@ -175,9 +175,9 @@ static bool PoolingShape(const nnvm::NodeAttrs& attrs, std::ceil(static_cast(dshape_ncw[2] + 2 * param.pad[0]) / param.stride[0])); } // Convert back from standard (NCW) layout space to the actual layout type - mxnet::TShape oshape = (layout == mshadow::kNWC) - ? ConvertLayout(oshape_ncw, mshadow::kNCW, mshadow::kNWC) - : oshape_ncw; + mxnet::TShape oshape = (layout == mshadow::kNWC) ? + ConvertLayout(oshape_ncw, mshadow::kNCW, mshadow::kNWC) : + oshape_ncw; out_shape->clear(); out_shape->push_back(oshape); // save output shape #if MXNET_USE_ONEDNN == 1 @@ -189,8 +189,9 @@ static bool PoolingShape(const nnvm::NodeAttrs& attrs, CHECK(layout == mshadow::kNCHW || layout == mshadow::kNHWC) << "Need 2D layout"; // Perform shape calculations in a standard (NCHW) layout space mshadow::Shape<4> dshape_nchw = - (layout == mshadow::kNHWC) ? ConvertLayout(dshape.get<4>(), mshadow::kNHWC, mshadow::kNCHW) - : dshape.get<4>(); + (layout == mshadow::kNHWC) ? + ConvertLayout(dshape.get<4>(), mshadow::kNHWC, mshadow::kNCHW) : + dshape.get<4>(); mshadow::Shape<4> oshape_nchw = dshape_nchw; CHECK(param.kernel[0] <= dshape_nchw[2] + 2 * param.pad[0]) << "kernel size (" << param.kernel[0] << ") exceeds input (" << dshape_nchw[2] @@ -212,9 +213,9 @@ static bool PoolingShape(const nnvm::NodeAttrs& attrs, param.stride[1])); } // Convert back from standard (NCHW) layout space to the actual layout type - mxnet::TShape oshape = (layout == mshadow::kNHWC) - ? ConvertLayout(oshape_nchw, mshadow::kNCHW, mshadow::kNHWC) - : oshape_nchw; + mxnet::TShape oshape = (layout == mshadow::kNHWC) ? + ConvertLayout(oshape_nchw, mshadow::kNCHW, mshadow::kNHWC) : + oshape_nchw; out_shape->clear(); out_shape->push_back(oshape); // save output shape #if MXNET_USE_ONEDNN == 1 @@ -226,9 +227,9 @@ static bool PoolingShape(const nnvm::NodeAttrs& attrs, CHECK(layout == mshadow::kNCDHW || layout == mshadow::kNDHWC) << "Need 3D layout"; // Perform shape calculations in a standard (NCDHW) layout space mshadow::Shape<5> dshape_ncdhw = - (layout == mshadow::kNDHWC) - ? ConvertLayout(dshape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW) - : dshape.get<5>(); + (layout == mshadow::kNDHWC) ? + ConvertLayout(dshape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW) : + dshape.get<5>(); mshadow::Shape<5> oshape_ncdhw = dshape_ncdhw; CHECK_LE(param.kernel[0], dshape_ncdhw[2] + 2 * param.pad[0]) << "kernel size exceeds input"; CHECK_LE(param.kernel[1], dshape_ncdhw[3] + 2 * param.pad[1]) << "kernel size exceeds input"; @@ -255,9 +256,9 @@ static bool PoolingShape(const nnvm::NodeAttrs& attrs, param.stride[2])); } // Convert back from standard (NCDHW) layout space to the actual layout type - mxnet::TShape oshape = (layout == mshadow::kNDHWC) - ? ConvertLayout(oshape_ncdhw, mshadow::kNCDHW, mshadow::kNDHWC) - : oshape_ncdhw; + mxnet::TShape oshape = (layout == mshadow::kNDHWC) ? + ConvertLayout(oshape_ncdhw, mshadow::kNCDHW, mshadow::kNDHWC) : + oshape_ncdhw; out_shape->clear(); out_shape->push_back(oshape); // save output shape #if MXNET_USE_ONEDNN == 1 diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h index 2787e419a156..9ee41cb8f9a6 100644 --- a/src/operator/nn/softmax-inl.h +++ b/src/operator/nn/softmax-inl.h @@ -284,16 +284,16 @@ inline void SoftmaxGrad(Stream* s, DType final_result; if (temperature == 1.0) { for (index_t j = 0; j < M; ++j) { - final_result = negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) - : OP2::Map(ograd[base + j * sa], out[base + j * sa], sum); + final_result = negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) : + OP2::Map(ograd[base + j * sa], out[base + j * sa], sum); final_result = (j < len) ? final_result : DType(0.0f); KERNEL_ASSIGN(igrad[base + j * sa], Req, final_result); } } else { for (index_t j = 0; j < M; ++j) { final_result = - negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature - : OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature; + negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature : + OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature; final_result = (j < len) ? final_result : DType(0.0f); KERNEL_ASSIGN(igrad[base + j * sa], Req, final_result); } @@ -314,15 +314,15 @@ inline void SoftmaxGrad(Stream* s, DType final_result; if (temperature == 1.0) { for (index_t j = 0; j < M; ++j) { - final_result = negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) - : OP2::Map(ograd[base + j * sa], out[base + j * sa], sum); + final_result = negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) : + OP2::Map(ograd[base + j * sa], out[base + j * sa], sum); KERNEL_ASSIGN(igrad[base + j * sa], Req, final_result); } } else { for (index_t j = 0; j < M; ++j) { final_result = - negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature - : OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature; + negate ? -OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature : + OP2::Map(ograd[base + j * sa], out[base + j * sa], sum) / temperature; KERNEL_ASSIGN(igrad[base + j * sa], Req, final_result); } } @@ -449,9 +449,9 @@ __global__ void masked_softmax_kernel(DType* in, for (index_t i = x; i < M; i += x_size) { val = (negate ? -in[base + i * sa] : in[base + i * sa]); bool mask_value = bcst_mask_axis ? in_mask[base_mask] : in_mask[base_mask + i * sa_mask]; - out[base + i * sa] = mask_value - ? DType(OP::Map((val - smax) / static_cast(temperature), ssum)) - : DType(masked_value); + out[base + i * sa] = mask_value ? + DType(OP::Map((val - smax) / static_cast(temperature), ssum)) : + DType(masked_value); } } @@ -578,8 +578,8 @@ __global__ void masked_softmax_stride1_kernel(const DType* in, masked_value = -INFINITY; for (index_t i = my_id; i < M; i += threads_per_row) { const DType val = (negate ? -row[i] : row[i]); - row[i] = row_mask[i] ? DType(OP::Map((val - smax) / static_cast(temperature), ssum)) - : DType(masked_value); + row[i] = row_mask[i] ? DType(OP::Map((val - smax) / static_cast(temperature), ssum)) : + DType(masked_value); } __syncthreads(); @@ -852,8 +852,8 @@ __global__ void masked_softmax_grad_kernel(OType* out, DType final_result; for (index_t i = x; i < M; i += x_size) { bool mask_value = bcst_mask_axis ? in_mask[base_mask] : in_mask[base_mask + i * sa_mask]; - final_result = negate ? -OP2::Map(ograd[base + i * sa], out[base + i * sa], ssum) - : OP2::Map(ograd[base + i * sa], out[base + i * sa], ssum); + final_result = negate ? -OP2::Map(ograd[base + i * sa], out[base + i * sa], ssum) : + OP2::Map(ograd[base + i * sa], out[base + i * sa], ssum); final_result = mask_value ? final_result / static_cast(temperature) : DType(0.0f); KERNEL_ASSIGN(igrad[base + i * sa], Req, final_result); } diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc index 8c88d53de939..5b9c4ae41a46 100644 --- a/src/operator/nn/softmax.cc +++ b/src/operator/nn/softmax.cc @@ -140,9 +140,9 @@ Example:: [](const NodeAttrs& attrs) { const SoftmaxParam& param = nnvm::get(attrs.parsed); - return (param.use_length.value()) - ? std::vector{"data", "length"} - : std::vector{"data"}; + return (param.use_length.value()) ? + std::vector{"data", "length"} : + std::vector{"data"}; }) .set_attr("FListOutputNames", [](const NodeAttrs& attrs) { diff --git a/src/operator/npx_control_flow.cc b/src/operator/npx_control_flow.cc index a1dd419513e9..0e154d3f1354 100644 --- a/src/operator/npx_control_flow.cc +++ b/src/operator/npx_control_flow.cc @@ -720,9 +720,9 @@ static void WhileLoopGradComputeExCPU(const OpStatePtr& state_ptr, } if (i < (size_t)params.num_args) { // a var - igrads[i] = (step == 0) - ? outputs[i] - : NDArray(outputs[i].shape(), outputs[i].ctx(), true, outputs[i].dtype()); + igrads[i] = (step == 0) ? + outputs[i] : + NDArray(outputs[i].shape(), outputs[i].ctx(), true, outputs[i].dtype()); iter_req[i] = (step == 0 || req[i] == kNullOp) ? req[i] : kWriteTo; ++i; diff --git a/src/operator/numpy/linalg/np_lstsq.cc b/src/operator/numpy/linalg/np_lstsq.cc index cfcfa4b3c4d3..2b867bc8134c 100644 --- a/src/operator/numpy/linalg/np_lstsq.cc +++ b/src/operator/numpy/linalg/np_lstsq.cc @@ -54,9 +54,9 @@ inline bool LstsqOpType(const nnvm::NodeAttrs& attrs, CHECK(b_type == mshadow::kFloat32 || b_type == mshadow::kFloat64) << "lstsq operation only supports 32-bit and 64-bit floating point"; - const mshadow::TypeFlag floatFlag = (mshadow::kFloat32 == a_type && mshadow::kFloat32 == b_type) - ? mshadow::kFloat32 - : mshadow::kFloat64; + const mshadow::TypeFlag floatFlag = (mshadow::kFloat32 == a_type && mshadow::kFloat32 == b_type) ? + mshadow::kFloat32 : + mshadow::kFloat64; TYPE_ASSIGN_CHECK(*out_attrs, 0, floatFlag); TYPE_ASSIGN_CHECK(*out_attrs, 1, floatFlag); TYPE_ASSIGN_CHECK(*out_attrs, 2, index_type_flag); diff --git a/src/operator/numpy/linalg/np_norm.cc b/src/operator/numpy/linalg/np_norm.cc index 735a6655b0b5..9838c9f59e39 100644 --- a/src/operator/numpy/linalg/np_norm.cc +++ b/src/operator/numpy/linalg/np_norm.cc @@ -165,8 +165,8 @@ bool NumpyNormShape(const nnvm::NodeAttrs& attrs, } else { TShape axis(param.axis.value().ndim(), 0); for (int i = 0; i < param.axis.value().ndim(); ++i) { - axis[i] = param.axis.value()[i] < 0 ? (*in_attrs)[0].ndim() + param.axis.value()[i] - : param.axis.value()[i]; + axis[i] = param.axis.value()[i] < 0 ? (*in_attrs)[0].ndim() + param.axis.value()[i] : + param.axis.value()[i]; } const_cast(param).axis = axis; if (param.axis.value().ndim() == 2) { diff --git a/src/operator/numpy/np_bincount_op.cc b/src/operator/numpy/np_bincount_op.cc index 6ede3a69f721..13d1c880fcf3 100644 --- a/src/operator/numpy/np_bincount_op.cc +++ b/src/operator/numpy/np_bincount_op.cc @@ -114,9 +114,9 @@ NNVM_REGISTER_OP(_npi_bincount) [](const NodeAttrs& attrs) { const NumpyBincountParam& params = nnvm::get(attrs.parsed); - return params.has_weights - ? std::vector{"data", "weights"} - : std::vector{"data"}; + return params.has_weights ? + std::vector{"data", "weights"} : + std::vector{"data"}; }) .set_attr("FResourceRequest", [](const NodeAttrs& attrs) { diff --git a/src/operator/numpy/np_boolean_mask_assign.cc b/src/operator/numpy/np_boolean_mask_assign.cc index 3687a10ed749..4283821ccfb3 100644 --- a/src/operator/numpy/np_boolean_mask_assign.cc +++ b/src/operator/numpy/np_boolean_mask_assign.cc @@ -262,9 +262,9 @@ void NumpyBooleanAssignForwardCPU(const nnvm::NodeAttrs& attrs, trailing, inputs[2].dptr()); } else { - bool need_broadcast = (vshape.ndim() == (dshape.ndim() - mshape.ndim() + 1)) - ? (vshape[start_axis] == 1) - : true; + bool need_broadcast = (vshape.ndim() == (dshape.ndim() - mshape.ndim() + 1)) ? + (vshape[start_axis] == 1) : + true; Kernel, cpu>::Launch(s, valid_num, data.dptr(), diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.h b/src/operator/numpy/np_broadcast_reduce_op_value.h index 68b475bf87e0..bf171133509f 100644 --- a/src/operator/numpy/np_broadcast_reduce_op_value.h +++ b/src/operator/numpy/np_broadcast_reduce_op_value.h @@ -79,9 +79,9 @@ inline void TVMOpReduce(const OpContext& ctx, << "TVMOpReduce only supports ndim <= " << max_reduce_ndim; const TBlob expanded_output = - (input.ndim() == output.ndim() - ? output - : output.reshape(NumpyReduceAxesShapeImpl(input.shape_, axis, true))); + (input.ndim() == output.ndim() ? + output : + output.reshape(NumpyReduceAxesShapeImpl(input.shape_, axis, true))); CHECK_EQ(input.ndim(), expanded_output.ndim()); int reduce1st_dim = 0; if (input.ndim() > 0 && input.size(0) != expanded_output.size(0)) { diff --git a/src/operator/numpy/np_delete_op-inl.h b/src/operator/numpy/np_delete_op-inl.h index 5bb737fa75a7..901b15f204e3 100644 --- a/src/operator/numpy/np_delete_op-inl.h +++ b/src/operator/numpy/np_delete_op-inl.h @@ -281,8 +281,8 @@ void NumpyDeleteCompute(const nnvm::NodeAttrs& attrs, char* is_delete_ptr = nullptr; MSHADOW_TYPE_SWITCH( ((inputs.size() == 2U) ? // obj is tensor - inputs[delete_::kObj].dtype() - : mshadow::DataType::kFlag), + inputs[delete_::kObj].dtype() : + mshadow::DataType::kFlag), IType, { size_t temp_mem_size = sizeof(int64_t) * arr.shape()[axis] + sizeof(IType) * numtodel + @@ -342,8 +342,8 @@ void NumpyDeleteCompute(const nnvm::NodeAttrs& attrs, } MSHADOW_TYPE_SWITCH(((inputs.size() == 2U) ? // obj is tensor - inputs[delete_::kObj].dtype() - : mshadow::DataType::kFlag), + inputs[delete_::kObj].dtype() : + mshadow::DataType::kFlag), IType, { MXNET_NDIM_SWITCH(outshape.ndim(), ndim, { diff --git a/src/operator/numpy/np_delete_op.cc b/src/operator/numpy/np_delete_op.cc index 47026883beb2..36a4c9f6bb57 100644 --- a/src/operator/numpy/np_delete_op.cc +++ b/src/operator/numpy/np_delete_op.cc @@ -81,9 +81,9 @@ NNVM_REGISTER_OP(_npi_delete) const NumpyDeleteParam& params = nnvm::get(attrs.parsed); return (params.step.has_value() || - params.int_ind.has_value()) - ? std::vector{"arr"} - : std::vector{"arr", "obj"}; + params.int_ind.has_value()) ? + std::vector{"arr"} : + std::vector{"arr", "obj"}; }) .set_attr("FInferType", NumpyDeleteType) .set_attr("FComputeEx", NumpyDeleteCompute) diff --git a/src/operator/numpy/np_einsum_op-inl.h b/src/operator/numpy/np_einsum_op-inl.h index 5525b9209fc1..56e6f90b77c6 100644 --- a/src/operator/numpy/np_einsum_op-inl.h +++ b/src/operator/numpy/np_einsum_op-inl.h @@ -436,8 +436,8 @@ struct numpy_einsum { AType sum = 0; do { AType tmp = - back ? static_cast(out_grad[dot(oidx, ostride[nop]) + dot(ridx, rstride[nop])]) - : (AType)1; + back ? static_cast(out_grad[dot(oidx, ostride[nop]) + dot(ridx, rstride[nop])]) : + (AType)1; for (int iop = 0; iop < nop; ++iop) { if (iop != iop0) { index_t k = dot(oidx, ostride[iop]) + dot(ridx, rstride[iop]); diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op.h b/src/operator/numpy/np_elemwise_broadcast_logic_op.h index 9d25615757a6..fafee3faedfa 100644 --- a/src/operator/numpy/np_elemwise_broadcast_logic_op.h +++ b/src/operator/numpy/np_elemwise_broadcast_logic_op.h @@ -64,8 +64,8 @@ static constexpr char func_logical_xor_gpu[] = "logical_xor_gpu"; #pragma clang diagnostic pop inline bool NumpyBinaryLogicOpType(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { + std::vector* in_attrs, + std::vector* out_attrs) { CHECK_EQ(in_attrs->size(), 2U); CHECK_EQ(out_attrs->size(), 1U); if (in_attrs->at(0) == -1 && in_attrs->at(1) == -1) @@ -260,17 +260,17 @@ struct GetBinaryBroadcastCompute { #if MXNET_USE_CUDA -#define MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(name) \ - NNVM_REGISTER_OP(_npi_##name) \ - .set_attr("FCompute", BinaryBroadcastRTCCompute{"np_" #name}) +#define MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(name) \ + NNVM_REGISTER_OP(_npi_##name) \ + .set_attr("FCompute", BinaryBroadcastRTCCompute{"np_" #name}) #endif // MXNET_USE_CUDA #endif // MXNET_USE_TVM_OP inline bool NumpyBinaryScalarLogicOpType(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { + std::vector* in_attrs, + std::vector* out_attrs) { CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); if (in_attrs->at(0) == -1) @@ -342,7 +342,6 @@ struct TVMBinaryBroadcastScalarCompute { .add_argument("data", "NDArray-or-Symbol", "First input to the function") \ .add_arguments(NumpyBinaryScalarParam::__FIELDS__()) - #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-const-variable" static constexpr char func_equal_scalar_cpu[] = "equal_scalar_cpu"; @@ -393,9 +392,9 @@ static constexpr char func_logical_xor_scalar_gpu[] = "logical_xor_scalar_gpu" #if MXNET_USE_CUDA -#define MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(name) \ - NNVM_REGISTER_OP(_npi_##name##_scalar) \ - .set_attr("FCompute", BinaryScalarRTCCompute{"np_" #name}) +#define MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(name) \ + NNVM_REGISTER_OP(_npi_##name##_scalar) \ + .set_attr("FCompute", BinaryScalarRTCCompute{"np_" #name}) #endif // MXNET_USE_CUDA diff --git a/src/operator/numpy/np_elemwise_broadcast_op.h b/src/operator/numpy/np_elemwise_broadcast_op.h index da40fe4044e7..97373d724324 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.h +++ b/src/operator/numpy/np_elemwise_broadcast_op.h @@ -549,8 +549,8 @@ void NumpyBinaryBackwardUseIn(const nnvm::NodeAttrs& attrs, .add_arguments(NumpyBinaryScalarParam::__FIELDS__()) inline bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { + std::vector* in_attrs, + std::vector* out_attrs) { CHECK_EQ(in_attrs->size(), 2U); CHECK_EQ(out_attrs->size(), 1U); const int ltype = in_attrs->at(0); diff --git a/src/operator/numpy/np_elemwise_broadcast_op_add.cc b/src/operator/numpy/np_elemwise_broadcast_op_add.cc index fd7fa3a62e73..50a79ab5dc2f 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_add.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op_add.cc @@ -28,26 +28,27 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_add) -.set_attr( - "FCompute", - NumpyBinaryBroadcastComputeWithBool) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_add"}); + .set_attr("FCompute", + NumpyBinaryBroadcastComputeWithBool) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_add"}); NNVM_REGISTER_OP(_backward_npi_broadcast_add) -.set_num_inputs(3) -.set_num_outputs(2) -.set_attr("TIsBackward", true) -.set_attr("FInplaceOption", - [](const NodeAttrs& attrs){ - return std::vector >{{0, 0}, {0, 1}}; - }) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("FCompute", NumpyBinaryBackwardUseIn); + .set_num_inputs(3) + .set_num_outputs(2) + .set_attr("TIsBackward", true) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}, {0, 1}}; + }) + .set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) + .set_attr("FCompute", + NumpyBinaryBackwardUseIn); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_add.cu b/src/operator/numpy/np_elemwise_broadcast_op_add.cu index ad8cc6053c40..43802971ed36 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_add.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_add.cu @@ -27,11 +27,10 @@ namespace mxnet { namespace op { -NNVM_REGISTER_OP(_npi_add) -.set_attr("FCompute", BinaryBroadcastRTCCompute{"add"}); +NNVM_REGISTER_OP(_npi_add).set_attr("FCompute", BinaryBroadcastRTCCompute{"add"}); NNVM_REGISTER_OP(_backward_npi_broadcast_add) -.set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"one", "one"}); + .set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"one", "one"}); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mod.cc b/src/operator/numpy/np_elemwise_broadcast_op_mod.cc index 0dfe0999a3ed..e47a2f2bc96f 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_mod.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op_mod.cc @@ -28,26 +28,27 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_mod) -.set_attr( - "FCompute", - NumpyBinaryBroadcastCompute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mod"}); + .set_attr("FCompute", + NumpyBinaryBroadcastCompute) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mod"}); NNVM_REGISTER_OP(_backward_npi_broadcast_mod) -.set_num_inputs(3) -.set_num_outputs(2) -.set_attr("TIsBackward", true) -.set_attr("FInplaceOption", - [](const NodeAttrs& attrs){ - return std::vector >{{0, 1}}; - }) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("FCompute", NumpyBinaryBackwardUseIn); + .set_num_inputs(3) + .set_num_outputs(2) + .set_attr("TIsBackward", true) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 1}}; + }) + .set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) + .set_attr("FCompute", + NumpyBinaryBackwardUseIn); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mod.cu b/src/operator/numpy/np_elemwise_broadcast_op_mod.cu index 642b2f5ccc7c..20ca4e311ba7 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_mod.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_mod.cu @@ -27,11 +27,10 @@ namespace mxnet { namespace op { -NNVM_REGISTER_OP(_npi_mod) -.set_attr("FCompute", BinaryBroadcastRTCCompute{"mod"}); +NNVM_REGISTER_OP(_npi_mod).set_attr("FCompute", BinaryBroadcastRTCCompute{"mod"}); NNVM_REGISTER_OP(_backward_npi_broadcast_mod) -.set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"mod_grad", "mod_rgrad"}); + .set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"mod_grad", "mod_rgrad"}); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mul.cc b/src/operator/numpy/np_elemwise_broadcast_op_mul.cc index c5180e41faee..3e627c8c7e10 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_mul.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op_mul.cc @@ -28,26 +28,27 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply) -.set_attr( - "FCompute", - NumpyBinaryBroadcastComputeWithBool) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mul"}); + .set_attr("FCompute", + NumpyBinaryBroadcastComputeWithBool) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mul"}); NNVM_REGISTER_OP(_backward_npi_broadcast_mul) -.set_num_inputs(3) -.set_num_outputs(2) -.set_attr("TIsBackward", true) -.set_attr("FInplaceOption", - [](const NodeAttrs& attrs){ - return std::vector >{{0, 1}}; - }) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("FCompute", NumpyBinaryBackwardUseIn); + .set_num_inputs(3) + .set_num_outputs(2) + .set_attr("TIsBackward", true) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 1}}; + }) + .set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) + .set_attr("FCompute", + NumpyBinaryBackwardUseIn); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mul.cu b/src/operator/numpy/np_elemwise_broadcast_op_mul.cu index c720b79f4c0d..882855ddc264 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_mul.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_mul.cu @@ -28,10 +28,10 @@ namespace mxnet { namespace op { NNVM_REGISTER_OP(_npi_multiply) -.set_attr("FCompute", BinaryBroadcastRTCCompute{"mul"}); + .set_attr("FCompute", BinaryBroadcastRTCCompute{"mul"}); NNVM_REGISTER_OP(_backward_npi_broadcast_mul) -.set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"right", "left"}); + .set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"right", "left"}); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_pow.cc b/src/operator/numpy/np_elemwise_broadcast_op_pow.cc index c281d125a45c..aa5f4c4dbb5d 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_pow.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op_pow.cc @@ -28,26 +28,28 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_power) -.set_attr( - "FCompute", - NumpyBinaryBroadcastComputeWithBool) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_power"}); + .set_attr("FCompute", + NumpyBinaryBroadcastComputeWithBool) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_power"}); NNVM_REGISTER_OP(_backward_npi_broadcast_power) -.set_num_inputs(3) -.set_num_outputs(2) -.set_attr("TIsBackward", true) -.set_attr("FInplaceOption", - [](const NodeAttrs& attrs){ - return std::vector >{{0, 1}}; - }) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("FCompute", NumpyBinaryBackwardUseIn); + .set_num_inputs(3) + .set_num_outputs(2) + .set_attr("TIsBackward", true) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 1}}; + }) + .set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) + .set_attr( + "FCompute", + NumpyBinaryBackwardUseIn); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_pow.cu b/src/operator/numpy/np_elemwise_broadcast_op_pow.cu index 3a78ba6fd8d7..9e79578a9413 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_pow.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_pow.cu @@ -28,10 +28,11 @@ namespace mxnet { namespace op { NNVM_REGISTER_OP(_npi_power) -.set_attr("FCompute", BinaryBroadcastRTCCompute{"power"}); + .set_attr("FCompute", BinaryBroadcastRTCCompute{"power"}); NNVM_REGISTER_OP(_backward_npi_broadcast_power) -.set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"power_grad", "power_rgrad"}); + .set_attr("FCompute", + BinaryBroadcastRTCBackwardUseIn{"power_grad", "power_rgrad"}); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc index 4fd1f2c84070..e4e61d12262a 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc @@ -30,36 +30,36 @@ namespace op { DMLC_REGISTER_PARAMETER(NumpyBinaryScalarParam); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_add_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseNone{"_copy"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseNone{"_copy"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_subtract_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseNone{"_copy"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseNone{"_copy"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rsubtract_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseNone{"negative"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseNone{"negative"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_multiply_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseNone{"_backward_mul_scalar"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseNone{"_backward_mul_scalar"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_mod_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_mod_scalar"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_mod_scalar"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rmod_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_rmod_scalar"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_rmod_scalar"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_power_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_power_scalar"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_power_scalar"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rpower_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", ElemwiseGradUseOut{"_backward_rpower_scalar"}); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", ElemwiseGradUseOut{"_backward_rpower_scalar"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_floor_divide_scalar) .set_attr("FCompute", BinaryScalarOp::Compute) diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu index c7bbeefb4445..21a8aeddf41d 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu @@ -28,28 +28,28 @@ namespace mxnet { namespace op { NNVM_REGISTER_OP(_npi_add_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"add"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"add"}); NNVM_REGISTER_OP(_npi_subtract_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"sub"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"sub"}); NNVM_REGISTER_OP(_npi_rsubtract_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"rsub"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"rsub"}); NNVM_REGISTER_OP(_npi_multiply_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"mul"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"mul"}); NNVM_REGISTER_OP(_npi_mod_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"mod"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"mod"}); NNVM_REGISTER_OP(_npi_rmod_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"rmod"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"rmod"}); NNVM_REGISTER_OP(_npi_power_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"power"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"power"}); NNVM_REGISTER_OP(_npi_rpower_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"rpow"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"rpow"}); NNVM_REGISTER_OP(_npi_floor_divide_scalar) .set_attr("FCompute", BinaryScalarRTCCompute{"floor_divide"}); diff --git a/src/operator/numpy/np_elemwise_broadcast_op_sub.cc b/src/operator/numpy/np_elemwise_broadcast_op_sub.cc index ff6501d3d413..5f3ba7653549 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_sub.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op_sub.cc @@ -28,26 +28,27 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_subtract) -.set_attr( - "FCompute", - NumpyBinaryBroadcastCompute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_sub"}); + .set_attr("FCompute", + NumpyBinaryBroadcastCompute) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_sub"}); NNVM_REGISTER_OP(_backward_npi_broadcast_sub) -.set_num_inputs(3) -.set_num_outputs(2) -.set_attr("TIsBackward", true) -.set_attr("FInplaceOption", - [](const NodeAttrs& attrs){ - return std::vector >{{0, 0}, {0, 1}}; - }) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("FCompute", NumpyBinaryBackwardUseIn); + .set_num_inputs(3) + .set_num_outputs(2) + .set_attr("TIsBackward", true) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}, {0, 1}}; + }) + .set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) + .set_attr("FCompute", + NumpyBinaryBackwardUseIn); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op_sub.cu b/src/operator/numpy/np_elemwise_broadcast_op_sub.cu index 2709dc3eec09..943e8fd96683 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_sub.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_sub.cu @@ -28,10 +28,10 @@ namespace mxnet { namespace op { NNVM_REGISTER_OP(_npi_subtract) -.set_attr("FCompute", BinaryBroadcastRTCCompute{"sub"}); + .set_attr("FCompute", BinaryBroadcastRTCCompute{"sub"}); NNVM_REGISTER_OP(_backward_npi_broadcast_sub) -.set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"one", "negone"}); + .set_attr("FCompute", BinaryBroadcastRTCBackwardUseIn{"one", "negone"}); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_insert_op_scalar-inl.h b/src/operator/numpy/np_insert_op_scalar-inl.h index 7a9b8952682a..21ae59bf362d 100644 --- a/src/operator/numpy/np_insert_op_scalar-inl.h +++ b/src/operator/numpy/np_insert_op_scalar-inl.h @@ -56,9 +56,9 @@ void NumpyInsertScalarCompute(const nnvm::NodeAttrs& attrs, int axis = param.axis.has_value() ? param.axis.value() : 0; TBlob arr; TBlob values = - param.val.has_value() - ? TBlob(nullptr, mxnet::TShape(0, 1), xpu::kDevMask, outputs[out_pos].type_flag_) - : inputs[val_pos]; + param.val.has_value() ? + TBlob(nullptr, mxnet::TShape(0, 1), xpu::kDevMask, outputs[out_pos].type_flag_) : + inputs[val_pos]; if (!param.axis.has_value()) { arr = inputs[arr_pos].reshape(Shape1(inputs[arr_pos].shape_.Size())); ndim = 1; diff --git a/src/operator/numpy/np_insert_op_slice-inl.h b/src/operator/numpy/np_insert_op_slice-inl.h index 92768c3754d8..7c1ade35e6b3 100644 --- a/src/operator/numpy/np_insert_op_slice-inl.h +++ b/src/operator/numpy/np_insert_op_slice-inl.h @@ -55,9 +55,9 @@ void NumpyInsertSliceCompute(const nnvm::NodeAttrs& attrs, int axis = param.axis.has_value() ? param.axis.value() : 0; TBlob arr; TBlob values = - param.val.has_value() - ? TBlob(nullptr, mxnet::TShape(0, 1), xpu::kDevMask, outputs[out_pos].type_flag_) - : inputs[val_pos]; + param.val.has_value() ? + TBlob(nullptr, mxnet::TShape(0, 1), xpu::kDevMask, outputs[out_pos].type_flag_) : + inputs[val_pos]; if (!param.axis.has_value()) { arr = inputs[arr_pos].reshape(Shape1(inputs[arr_pos].shape_.Size())); ndim = 1; diff --git a/src/operator/numpy/np_insert_op_tensor-inl.h b/src/operator/numpy/np_insert_op_tensor-inl.h index cb5fdce88134..594e135dd336 100644 --- a/src/operator/numpy/np_insert_op_tensor-inl.h +++ b/src/operator/numpy/np_insert_op_tensor-inl.h @@ -65,9 +65,9 @@ void NumpyInsertTensorCompute(const nnvm::NodeAttrs& attrs, int axis = param.axis.has_value() ? param.axis.value() : 0; TBlob arr; TBlob values = - param.val.has_value() - ? TBlob(nullptr, mxnet::TShape(0, 1), xpu::kDevMask, outputs[out_pos].type_flag_) - : inputs[val_pos]; + param.val.has_value() ? + TBlob(nullptr, mxnet::TShape(0, 1), xpu::kDevMask, outputs[out_pos].type_flag_) : + inputs[val_pos]; if (!param.axis.has_value()) { arr = inputs[arr_pos].reshape(Shape1(inputs[arr_pos].shape_.Size())); ndim = 1; diff --git a/src/operator/numpy/np_interp_op.cc b/src/operator/numpy/np_interp_op.cc index a5d60b76194d..525460276419 100644 --- a/src/operator/numpy/np_interp_op.cc +++ b/src/operator/numpy/np_interp_op.cc @@ -68,9 +68,9 @@ NNVM_REGISTER_OP(_npi_interp) [](const NodeAttrs& attrs) { const NumpyInterpParam& param = nnvm::get(attrs.parsed); - return param.x_is_scalar - ? std::vector{"xp", "fp"} - : std::vector{"xp", "fp", "x"}; + return param.x_is_scalar ? + std::vector{"xp", "fp"} : + std::vector{"xp", "fp", "x"}; }) .set_attr("FCompute", NumpyInterpForward) .set_attr("FResourceRequest", diff --git a/src/operator/numpy/np_moments_op.cc b/src/operator/numpy/np_moments_op.cc index a6b5cce67fd2..773f2e166465 100644 --- a/src/operator/numpy/np_moments_op.cc +++ b/src/operator/numpy/np_moments_op.cc @@ -157,9 +157,9 @@ NNVM_REGISTER_OP(_npi_average) [](const NodeAttrs& attrs) { const auto& param = nnvm::get(attrs.parsed); - return param.weighted - ? std::vector{"a", "weights"} - : std::vector{"a"}; + return param.weighted ? + std::vector{"a", "weights"} : + std::vector{"a"}; }) .add_argument("a", "NDArray-or-Symbol", "The input") .add_argument("weights", "NDArray-or-Symbol", "The weights to calculate average") diff --git a/src/operator/numpy/np_percentile_op.cc b/src/operator/numpy/np_percentile_op.cc index a15f17602ffc..57164dbcacc3 100644 --- a/src/operator/numpy/np_percentile_op.cc +++ b/src/operator/numpy/np_percentile_op.cc @@ -95,9 +95,9 @@ NNVM_REGISTER_OP(_npi_percentile) [](const NodeAttrs& attrs) { const NumpyPercentileParam& param = nnvm::get(attrs.parsed); - return param.q_scalar.has_value() - ? std::vector{"a"} - : std::vector{"a", "q"}; + return param.q_scalar.has_value() ? + std::vector{"a"} : + std::vector{"a", "q"}; }) .set_attr("FCompute", NumpyPercentileForward) .set_attr("FResourceRequest", diff --git a/src/operator/numpy/np_true_divide.cc b/src/operator/numpy/np_true_divide.cc index 13fb72ca970a..9696f3f3ec46 100644 --- a/src/operator/numpy/np_true_divide.cc +++ b/src/operator/numpy/np_true_divide.cc @@ -54,9 +54,9 @@ bool TrueDivideType(const nnvm::NodeAttrs& attrs, const int lhs_dtype = in_attrs->at(0); const int rhs_dtype = - (num_inputs == 2) - ? in_attrs->at(1) - : (common::is_float(lhs_dtype) ? lhs_dtype : mxnet::common::GetDefaultDtype()); + (num_inputs == 2) ? + in_attrs->at(1) : + (common::is_float(lhs_dtype) ? lhs_dtype : mxnet::common::GetDefaultDtype()); TYPE_ASSIGN_CHECK(*out_attrs, 0, TrueDivideOutType(lhs_dtype, rhs_dtype)); return true; } diff --git a/src/operator/numpy/np_unique_op.cc b/src/operator/numpy/np_unique_op.cc index 0c4e7fceebe8..9c82122afab4 100644 --- a/src/operator/numpy/np_unique_op.cc +++ b/src/operator/numpy/np_unique_op.cc @@ -86,9 +86,10 @@ struct UniqueComputeMaskCPUKernel { out_data[i] = 1; } else { out_data[i] = - (std::memcmp(in_data + i * numel, in_data + (i - 1) * numel, numel * sizeof(DType)) == 0) - ? 0 - : 1; + (std::memcmp(in_data + i * numel, in_data + (i - 1) * numel, numel * sizeof(DType)) == + 0) ? + 0 : + 1; } } }; diff --git a/src/operator/numpy/random/np_bernoulli_op.cc b/src/operator/numpy/random/np_bernoulli_op.cc index 4d3546d53c69..fafd9170b2cb 100644 --- a/src/operator/numpy/random/np_bernoulli_op.cc +++ b/src/operator/numpy/random/np_bernoulli_op.cc @@ -48,9 +48,9 @@ NNVM_REGISTER_OP(_npi_bernoulli) if (param.logit.has_value() || param.prob.has_value()) { num_inputs -= 1; } - return (num_inputs == 0) - ? std::vector() - : std::vector{"input1"}; + return (num_inputs == 0) ? + std::vector() : + std::vector{"input1"}; }) .set_attr_parser(ParamParser) .set_attr("FInferShape", TwoparamsDistOpShape) diff --git a/src/operator/numpy/random/np_exponential_op.cc b/src/operator/numpy/random/np_exponential_op.cc index 3d37ce5dcfaf..920cbfecffbc 100644 --- a/src/operator/numpy/random/np_exponential_op.cc +++ b/src/operator/numpy/random/np_exponential_op.cc @@ -51,9 +51,9 @@ NNVM_REGISTER_OP(_npi_exponential) if (param.scale.has_value()) { num_inputs -= 1; } - return (num_inputs == 0) - ? std::vector() - : std::vector{"input1"}; + return (num_inputs == 0) ? + std::vector() : + std::vector{"input1"}; }) .set_attr_parser(ParamParser) .set_attr("FInferShape", TwoparamsDistOpShape) diff --git a/src/operator/numpy/random/np_pareto_op.cc b/src/operator/numpy/random/np_pareto_op.cc index e0c7650b7ddf..f0c7a8d7dc17 100644 --- a/src/operator/numpy/random/np_pareto_op.cc +++ b/src/operator/numpy/random/np_pareto_op.cc @@ -51,9 +51,9 @@ NNVM_REGISTER_OP(_npi_pareto) if (param.a.has_value()) { num_inputs -= 1; } - return (num_inputs == 0) - ? std::vector() - : std::vector{"input1"}; + return (num_inputs == 0) ? + std::vector() : + std::vector{"input1"}; }) .set_attr_parser(ParamParser) .set_attr("FInferShape", TwoparamsDistOpShape) diff --git a/src/operator/numpy/random/np_power_op.cc b/src/operator/numpy/random/np_power_op.cc index 0376aa9d9f4c..336ae1502bc8 100644 --- a/src/operator/numpy/random/np_power_op.cc +++ b/src/operator/numpy/random/np_power_op.cc @@ -48,9 +48,9 @@ NNVM_REGISTER_OP(_npi_powerd) if (param.a.has_value()) { num_inputs -= 1; } - return (num_inputs == 0) - ? std::vector() - : std::vector{"input1"}; + return (num_inputs == 0) ? + std::vector() : + std::vector{"input1"}; }) .set_attr_parser(ParamParser) .set_attr("FInferShape", UnaryDistOpShape) diff --git a/src/operator/numpy/random/np_rayleigh_op.cc b/src/operator/numpy/random/np_rayleigh_op.cc index 0b0085af9cd5..37cbd11f87ea 100644 --- a/src/operator/numpy/random/np_rayleigh_op.cc +++ b/src/operator/numpy/random/np_rayleigh_op.cc @@ -51,9 +51,9 @@ NNVM_REGISTER_OP(_npi_rayleigh) if (param.scale.has_value()) { num_inputs -= 1; } - return (num_inputs == 0) - ? std::vector() - : std::vector{"input1"}; + return (num_inputs == 0) ? + std::vector() : + std::vector{"input1"}; }) .set_attr_parser(ParamParser) .set_attr("FInferShape", TwoparamsDistOpShape) diff --git a/src/operator/numpy/random/np_weibull_op.cc b/src/operator/numpy/random/np_weibull_op.cc index 6e02114040b9..f1c490a2a8c4 100644 --- a/src/operator/numpy/random/np_weibull_op.cc +++ b/src/operator/numpy/random/np_weibull_op.cc @@ -51,9 +51,9 @@ NNVM_REGISTER_OP(_npi_weibull) if (param.a.has_value()) { num_inputs -= 1; } - return (num_inputs == 0) - ? std::vector() - : std::vector{"input1"}; + return (num_inputs == 0) ? + std::vector() : + std::vector{"input1"}; }) .set_attr_parser(ParamParser) .set_attr("FInferShape", TwoparamsDistOpShape) diff --git a/src/operator/optimizer_op-inl.h b/src/operator/optimizer_op-inl.h index 122ae8a076c0..5cc23364c0db 100644 --- a/src/operator/optimizer_op-inl.h +++ b/src/operator/optimizer_op-inl.h @@ -1272,9 +1272,9 @@ struct FTMLKernel { const DType clip_grad, const OpReqType req) { using namespace mshadow_op; - const DType grad_i = clip_grad >= 0.0f - ? clip::Map(rescale_grad * grad[i], clip_grad) + wd * weight[i] - : (rescale_grad * grad[i] + wd * weight[i]); + const DType grad_i = clip_grad >= 0.0f ? + clip::Map(rescale_grad * grad[i], clip_grad) + wd * weight[i] : + (rescale_grad * grad[i] + wd * weight[i]); v[i] = beta2 * v[i] + (1 - beta2) * square::Map(grad_i); const DType d_t = (1 - power::Map(beta1, t)) / lr * (square_root::Map(v[i] / (1 - power::Map(beta2, t))) + epsilon); diff --git a/src/operator/optimizer_op.cc b/src/operator/optimizer_op.cc index 89b50aa61e15..c3fd47dadd17 100644 --- a/src/operator/optimizer_op.cc +++ b/src/operator/optimizer_op.cc @@ -228,8 +228,8 @@ struct AdamStdDnsRspDnsKernel { const RType grad_i = (prefix_sum[i] - 1) * row_length; for (index_t j = 0; j < row_length; j++) { const index_t data_i = row_i + j; - DType grad_rescaled = non_zero ? static_cast(grad_data[grad_i + j] * rescale_grad) - : static_cast(0); + DType grad_rescaled = non_zero ? static_cast(grad_data[grad_i + j] * rescale_grad) : + static_cast(0); if (clip_gradient >= 0.0f) { grad_rescaled = clip::Map(grad_rescaled, clip_gradient); } diff --git a/src/operator/optimizer_op.cu b/src/operator/optimizer_op.cu index f70e9fdd67cb..4c75eb0c72fc 100644 --- a/src/operator/optimizer_op.cu +++ b/src/operator/optimizer_op.cu @@ -163,8 +163,8 @@ struct AdamStdDnsRspDnsKernel { const bool non_zero = (row_id == 0) ? prefix_sum[0] > 0 : prefix_sum[row_id] > prefix_sum[row_id - 1]; const RType grad_offset = (prefix_sum[row_id] - 1) * row_length + col_id; - DType grad_rescaled = non_zero ? static_cast(grad_data[grad_offset] * rescale_grad) - : static_cast(0); + DType grad_rescaled = non_zero ? static_cast(grad_data[grad_offset] * rescale_grad) : + static_cast(0); if (clip_gradient >= 0.0f) { grad_rescaled = clip::Map(grad_rescaled, clip_gradient); } diff --git a/src/operator/random/sampler.h b/src/operator/random/sampler.h index 7ed5529dc62e..296833c93999 100644 --- a/src/operator/random/sampler.h +++ b/src/operator/random/sampler.h @@ -370,10 +370,10 @@ struct SampleGeneralizedNegativeBinomialKernel { RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, { index_t nBatch(1 + (nSample - 1) / nParm); float lambda = - alpha[i / nBatch] == 0 - ? static_cast(mu[i / nBatch]) - : SampleGamma( - IType(1) / alpha[i / nBatch], alpha[i / nBatch] * mu[i / nBatch], &genImpl); + alpha[i / nBatch] == 0 ? + static_cast(mu[i / nBatch]) : + SampleGamma( + IType(1) / alpha[i / nBatch], alpha[i / nBatch] * mu[i / nBatch], &genImpl); out[i] = OType(SamplePoisson(lambda, &genImpl)); }); } diff --git a/src/operator/random/shuffle_op.cu b/src/operator/random/shuffle_op.cu index b66943e456bc..33e1ec28f9fd 100644 --- a/src/operator/random/shuffle_op.cu +++ b/src/operator/random/shuffle_op.cu @@ -76,8 +76,8 @@ void ShuffleForwardGPU(const nnvm::NodeAttrs& attrs, SortByKey(keys, out, true); } else { const size_t tmp_space_size = - req[0] == kWriteInplace ? 2 * first_axis_len * sizeof(index_t) + size * sizeof(DType) - : 2 * first_axis_len * sizeof(index_t); + req[0] == kWriteInplace ? 2 * first_axis_len * sizeof(index_t) + size * sizeof(DType) : + 2 * first_axis_len * sizeof(index_t); Tensor tmp_space = ctx.requested[1].get_space_typed(Shape1(tmp_space_size), s); char* tmp_space_ptr = tmp_space.dptr_; diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h index b6cfc79e1122..c37a65f31ecc 100644 --- a/src/operator/sequence_last-inl.h +++ b/src/operator/sequence_last-inl.h @@ -181,9 +181,9 @@ class SequenceLastOp : public Operator { Tensor out = out_data[seq_last::kOut].get_with_shape(Shape2(batch, rest_size), s); Tensor indices = - param_.use_sequence_length - ? in_data[seq_last::kSequenceLength].get(s) - : ctx.requested[seq_last::kTempSpace].get_space_typed(Shape1(batch), s); + param_.use_sequence_length ? + in_data[seq_last::kSequenceLength].get(s) : + ctx.requested[seq_last::kTempSpace].get_space_typed(Shape1(batch), s); if (!param_.use_sequence_length) indices = max_seq_len; @@ -223,9 +223,9 @@ class SequenceLastOp : public Operator { Tensor output_grad = out_grad[seq_last::kOut].get_with_shape(Shape2(batch, rest_size), s); Tensor indices = - param_.use_sequence_length - ? in_data[seq_last::kSequenceLength].get(s) - : ctx.requested[seq_last::kTempSpace].get_space_typed(Shape1(batch), s); + param_.use_sequence_length ? + in_data[seq_last::kSequenceLength].get(s) : + ctx.requested[seq_last::kTempSpace].get_space_typed(Shape1(batch), s); if (req[seq_last::kData] == kWriteTo) data_grad = 0.0f; diff --git a/src/operator/subgraph/build_subgraph.cc b/src/operator/subgraph/build_subgraph.cc index 9af7f49178e1..ef1218b49df0 100644 --- a/src/operator/subgraph/build_subgraph.cc +++ b/src/operator/subgraph/build_subgraph.cc @@ -853,9 +853,9 @@ nnvm::Graph BuildSubgraph(nnvm::Graph&& g) { const SubgraphPropertyPtr& subg_prop = g.GetAttr("subgraph_property"); if (verbose > 1) { - const std::string& prop_name = subg_prop->HasAttr("property_name") - ? subg_prop->GetAttr("property_name") - : "partition graph"; + const std::string& prop_name = subg_prop->HasAttr("property_name") ? + subg_prop->GetAttr("property_name") : + "partition graph"; LOG(INFO) << "start to execute " << prop_name << "."; } // top sort NodeEntry of all the nodes' inputs diff --git a/src/operator/subgraph/dnnl/dnnl_conv.cc b/src/operator/subgraph/dnnl/dnnl_conv.cc index f85ece31e450..e9fab47e6f44 100644 --- a/src/operator/subgraph/dnnl/dnnl_conv.cc +++ b/src/operator/subgraph/dnnl/dnnl_conv.cc @@ -414,9 +414,10 @@ static uint32_t SgDNNLConvNumInputs(const NodeAttrs& attrs) { auto num_input = DefaultSubgraphOpNumInputs(attrs); if (param.full_conv_param.dnnl_param.quantized) return num_input + 2 + - (param.full_conv_param.dnnl_param.with_sum && !param.full_conv_param.dnnl_param.dedup_sum - ? 2 - : 0); + (param.full_conv_param.dnnl_param.with_sum && + !param.full_conv_param.dnnl_param.dedup_sum ? + 2 : + 0); else return num_input; } @@ -468,10 +469,10 @@ static void SgDNNLConvParamParser(nnvm::NodeAttrs* attrs) { } else if (node_name == "Convolution") { param_.full_conv_param.conv_param = nnvm::get(node->attrs.parsed); } else if (node_name == "Activation" || node_name == "LeakyReLU" || node_name == "clip") { - auto& post_act_param = (param_.full_conv_param.dnnl_param.with_act && !with_act) - ? param_.full_conv_param.act_param - : param_.full_conv_param.postsum_act_param; - with_act = true; + auto& post_act_param = (param_.full_conv_param.dnnl_param.with_act && !with_act) ? + param_.full_conv_param.act_param : + param_.full_conv_param.postsum_act_param; + with_act = true; if (node_name == "Activation") { const auto act_param = nnvm::get(node->attrs.parsed); post_act_param.alg = GetDNNLActAlgo(act_param); diff --git a/src/operator/subgraph/dnnl/dnnl_fc.cc b/src/operator/subgraph/dnnl/dnnl_fc.cc index c07b8f7b8835..44c1a3585156 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc.cc +++ b/src/operator/subgraph/dnnl/dnnl_fc.cc @@ -670,8 +670,8 @@ NNVM_REGISTER_OP(_sg_onednn_fully_connected) }) .set_num_outputs([](const NodeAttrs& attrs) { auto const& full_param = nnvm::get(attrs.parsed); - return (full_param.dnnl_param.quantized && !full_param.dnnl_param.enable_float_output) ? 3 - : 1; + return (full_param.dnnl_param.quantized && !full_param.dnnl_param.enable_float_output) ? 3 : + 1; }) .set_attr_parser(SgDNNLFCParamParser) .set_attr("FListInputNames", SgDNNLFCListInputNames) diff --git a/src/operator/subgraph/tensorrt/nnvm_to_onnx.cc b/src/operator/subgraph/tensorrt/nnvm_to_onnx.cc index 5db3bb01df8a..23131cb9792c 100644 --- a/src/operator/subgraph/tensorrt/nnvm_to_onnx.cc +++ b/src/operator/subgraph/tensorrt/nnvm_to_onnx.cc @@ -668,7 +668,7 @@ void ConvertConcatenate(GraphProto* graph_proto, const array_view& inputs) { NodeProto* node_proto = graph_proto->add_node(); node_proto->set_name(node_name); - const auto& _param = nnvm::get(attrs.parsed); + const auto& _param = nnvm::get(attrs.parsed); const int param_dim = _param.dim.has_value() ? _param.dim.value() : 0; node_proto->set_op_type("Concat"); node_proto->set_name(attrs.name); diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h index c2b1dd215937..c145273076b2 100644 --- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h @@ -73,11 +73,11 @@ class TRT_Logger : public nvinfer1::ILogger { time_t rawtime = std::time(0); char buf[256]; strftime(&buf[0], 256, "%Y-%m-%d %H:%M:%S", std::gmtime(&rawtime)); - const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" - : severity == Severity::kERROR ? " ERROR" - : severity == Severity::kWARNING ? "WARNING" - : severity == Severity::kINFO ? " INFO" - : "UNKNOWN"); + const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : + severity == Severity::kERROR ? " ERROR" : + severity == Severity::kWARNING ? "WARNING" : + severity == Severity::kINFO ? " INFO" : + "UNKNOWN"); (*_ostream) << "[" << buf << " " << sevstr << "] " << msg << std::endl; } } diff --git a/src/operator/subgraph/tensorrt/tensorrt-inl.h b/src/operator/subgraph/tensorrt/tensorrt-inl.h index d142dc1ed358..ccfb150c838c 100644 --- a/src/operator/subgraph/tensorrt/tensorrt-inl.h +++ b/src/operator/subgraph/tensorrt/tensorrt-inl.h @@ -192,7 +192,7 @@ class TensorrtSelector : public SubgraphSelector { } if (op_name == "Concat") { - const auto& param = nnvm::get(n.attrs.parsed); + const auto& param = nnvm::get(n.attrs.parsed); const int param_dim = param.dim.has_value() ? param.dim.value() : 0; return (param_dim != 0); } diff --git a/src/operator/tensor/amp_cast.cc b/src/operator/tensor/amp_cast.cc index aee5f537d9bc..62e63a183e5a 100644 --- a/src/operator/tensor/amp_cast.cc +++ b/src/operator/tensor/amp_cast.cc @@ -46,9 +46,9 @@ static void AMPCastExCPU(const nnvm::NodeAttrs& attrs, dnnl::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine(); if (data.IsView() && data.IsDNNLData()) data = data.Reorder2Default(); - const auto i_mem = data.GetDNNLData(); - const size_t i_ndim = data.shape().ndim(); - dnnl::memory::dims i_dims = dnnl::memory::dims(i_ndim); + const auto i_mem = data.GetDNNLData(); + const size_t i_ndim = data.shape().ndim(); + dnnl::memory::dims i_dims = dnnl::memory::dims(i_ndim); for (size_t i = 0; i < i_ndim; i++) { i_dims[i] = static_cast(data.shape()[i]); } @@ -94,9 +94,9 @@ static void AMPMultiCastExCPU(const nnvm::NodeAttrs& attrs, auto data = inputs[i]; if (data.IsView() && data.IsDNNLData()) data = data.Reorder2Default(); - const auto i_mem = data.GetDNNLData(); - const size_t i_ndim = data.shape().ndim(); - dnnl::memory::dims i_dims = dnnl::memory::dims(i_ndim); + const auto i_mem = data.GetDNNLData(); + const size_t i_ndim = data.shape().ndim(); + dnnl::memory::dims i_dims = dnnl::memory::dims(i_ndim); for (size_t j = 0; j < i_ndim; j++) { i_dims[j] = static_cast(data.shape()[j]); } diff --git a/src/operator/tensor/broadcast_reduce-inl.h b/src/operator/tensor/broadcast_reduce-inl.h index 30f52f126166..77a81bcb646e 100644 --- a/src/operator/tensor/broadcast_reduce-inl.h +++ b/src/operator/tensor/broadcast_reduce-inl.h @@ -795,9 +795,9 @@ struct ReduceImplConfig { kernel_1.gridDim.x = std::min((unsigned int)kBaseGridNum, ceil_idiv(N, kernel_1.blockDim.x)); kernel_1.gridDim.y = std::min(kBaseGridNum, Mnext); - kernel_1.shMemSize = (kernel_1.blockDim.y > 1) - ? kernel_1.blockDim.x * kernel_1.blockDim.y * max_type_size * 2 - : 0; + kernel_1.shMemSize = (kernel_1.blockDim.y > 1) ? + kernel_1.blockDim.x * kernel_1.blockDim.y * max_type_size * 2 : + 0; // Maximum number of times we want TB to loop in M // Max size of M-block each TB can handle int maxMblock = kernel_1.blockDim.y * maxLoopPerTB; diff --git a/src/operator/tensor/dot-inl.h b/src/operator/tensor/dot-inl.h index 863ef28598ec..7cd9fa9988d8 100644 --- a/src/operator/tensor/dot-inl.h +++ b/src/operator/tensor/dot-inl.h @@ -251,8 +251,8 @@ inline bool DotForwardInferStorageType(const nnvm::NodeAttrs& attrs, bool rhs_rsp_or_dns = rhs_stype == kRowSparseStorage || rhs_stype == kDefaultStorage; bool hint_has_value = param.forward_stype.has_value(); NDArrayStorageType target_stype = - hint_has_value ? static_cast(param.forward_stype.value()) - : kUndefinedStorage; + hint_has_value ? static_cast(param.forward_stype.value()) : + kUndefinedStorage; if (!dispatched && lhs_stype == kDefaultStorage && rhs_stype == kDefaultStorage) { // dns, dns -> dns target_stype = hint_has_value ? target_stype : kDefaultStorage; @@ -1341,13 +1341,13 @@ inline bool DotShape(const nnvm::NodeAttrs& attrs, L[0] = mshadow::Shape1(lshape[0]); L[1] = lshape.ndim() > 1 ? mxnet::TShape(&lshape[1], lshape.end()) : mxnet::TShape(1, 1); } else { - L[0] = lshape.ndim() > 1 ? mxnet::TShape(&lshape[0], &lshape[lshape.ndim() - 1]) - : mxnet::TShape(1, 1); + L[0] = lshape.ndim() > 1 ? mxnet::TShape(&lshape[0], &lshape[lshape.ndim() - 1]) : + mxnet::TShape(1, 1); L[1] = mshadow::Shape1(lshape[lshape.ndim() - 1]); } if (Tb) { - R[0] = rshape.ndim() > 1 ? mxnet::TShape(&rshape[0], &rshape[rshape.ndim() - 1]) - : mxnet::TShape(1, 1); + R[0] = rshape.ndim() > 1 ? mxnet::TShape(&rshape[0], &rshape[rshape.ndim() - 1]) : + mxnet::TShape(1, 1); R[1] = mshadow::Shape1(rshape[rshape.ndim() - 1]); } else { R[0] = mshadow::Shape1(rshape[0]); diff --git a/src/operator/tensor/elemwise_binary_op-inl.h b/src/operator/tensor/elemwise_binary_op-inl.h index 9d8b43adb2af..b2d8394d71de 100644 --- a/src/operator/tensor/elemwise_binary_op-inl.h +++ b/src/operator/tensor/elemwise_binary_op-inl.h @@ -113,14 +113,14 @@ void ElemwiseBinaryOp::RspRspOp(mshadow::Stream* s, // Indices const Tensor indices_l = - lhs_is_dense ? Tensor() - : lhs.aux_data(rowsparse::kIdx).FlatTo1D(s); + lhs_is_dense ? Tensor() : + lhs.aux_data(rowsparse::kIdx).FlatTo1D(s); const Tensor indices_r = - rhs_is_dense ? Tensor() - : rhs.aux_data(rowsparse::kIdx).FlatTo1D(s); + rhs_is_dense ? Tensor() : + rhs.aux_data(rowsparse::kIdx).FlatTo1D(s); Tensor indices_out = - is_dense_result ? Tensor() - : output.aux_data(rowsparse::kIdx).FlatTo1D(s); + is_dense_result ? Tensor() : + output.aux_data(rowsparse::kIdx).FlatTo1D(s); // Data // TODO(cjolivier01): Change to get_with_shape() calls @@ -565,8 +565,8 @@ struct ElemwiseDnsCsrCsrKernel { for (int j = csr_indptr[i]; j < csr_indptr[i + 1]; ++j) { KERNEL_ASSIGN(out[j], req, - reverse ? OP::Map(dns_data[i * num_cols + csr_indices[j]], csr_data[j]) - : OP::Map(csr_data[j], dns_data[i * num_cols + csr_indices[j]])); + reverse ? OP::Map(dns_data[i * num_cols + csr_indices[j]], csr_data[j]) : + OP::Map(csr_data[j], dns_data[i * num_cols + csr_indices[j]])); } } } diff --git a/src/operator/tensor/elemwise_binary_scalar_op.h b/src/operator/tensor/elemwise_binary_scalar_op.h index 1fb241b24750..aa6b7f531f69 100644 --- a/src/operator/tensor/elemwise_binary_scalar_op.h +++ b/src/operator/tensor/elemwise_binary_scalar_op.h @@ -195,8 +195,8 @@ class BinaryScalarOp : public UnaryOp { // Split up into blocks of contiguous data and do those together const size_t row_item_start_iter = row_starts_ptr[i]; const size_t input_items_this_row = - !last_row ? static_cast(row_starts_ptr[i + 1]) - row_item_start_iter - : item_count - row_item_start_iter; + !last_row ? static_cast(row_starts_ptr[i + 1]) - row_item_start_iter : + item_count - row_item_start_iter; if (input_items_this_row) { const IType* this_row_column_indexes = column_indexes_ptr + row_item_start_iter; const DType* row_data_start = in + row_item_start_iter; diff --git a/src/operator/tensor/histogram.cc b/src/operator/tensor/histogram.cc index faa709c76e0d..d36e9e50faf0 100644 --- a/src/operator/tensor/histogram.cc +++ b/src/operator/tensor/histogram.cc @@ -161,9 +161,9 @@ Example:: [](const NodeAttrs& attrs) { const HistogramParam& params = nnvm::get(attrs.parsed); - return params.bin_cnt.has_value() - ? std::vector{"data"} - : std::vector{"data", "bins"}; + return params.bin_cnt.has_value() ? + std::vector{"data"} : + std::vector{"data", "bins"}; }) .set_attr("FResourceRequest", [](const NodeAttrs& attrs) { diff --git a/src/operator/tensor/la_op-inl.h b/src/operator/tensor/la_op-inl.h index 212d630bc016..49a3ff263ca8 100644 --- a/src/operator/tensor/la_op-inl.h +++ b/src/operator/tensor/la_op-inl.h @@ -674,10 +674,10 @@ struct gemm_backward { const nnvm::NodeAttrs& attrs) { const LaMatrixMacParam& param = nnvm::get(attrs.parsed); bool tA(param.transpose_a), tB(param.transpose_b); - (tA ? gemm::op(B, dD, dA, DType(param.alpha), DType(0), tB, true, s) - : gemm::op(dD, B, dA, DType(param.alpha), DType(0), false, !tB, s)); - (tB ? gemm::op(dD, A, dB, DType(param.alpha), DType(0), true, tA, s) - : gemm::op(A, dD, dB, DType(param.alpha), DType(0), !tA, false, s)); + (tA ? gemm::op(B, dD, dA, DType(param.alpha), DType(0), tB, true, s) : + gemm::op(dD, B, dA, DType(param.alpha), DType(0), false, !tB, s)); + (tB ? gemm::op(dD, A, dB, DType(param.alpha), DType(0), true, tA, s) : + gemm::op(A, dD, dB, DType(param.alpha), DType(0), !tA, false, s)); Copy(dC, dD, s); using namespace mxnet_op; Kernel::Launch(s, dC.MSize(), DType(param.beta), dC.dptr_); @@ -708,10 +708,10 @@ struct gemm2_backward { const nnvm::NodeAttrs& attrs) { const LaMatrixMultParam& param = nnvm::get(attrs.parsed); bool tA(param.transpose_a), tB(param.transpose_b); - (tA ? gemm::op(B, dC, dA, DType(param.alpha), DType(0), tB, true, s) - : gemm::op(dC, B, dA, DType(param.alpha), DType(0), false, !tB, s)); - (tB ? gemm::op(dC, A, dB, DType(param.alpha), DType(0), true, tA, s) - : gemm::op(A, dC, dB, DType(param.alpha), DType(0), !tA, false, s)); + (tA ? gemm::op(B, dC, dA, DType(param.alpha), DType(0), tB, true, s) : + gemm::op(dC, B, dA, DType(param.alpha), DType(0), false, !tB, s)); + (tB ? gemm::op(dC, A, dB, DType(param.alpha), DType(0), true, tA, s) : + gemm::op(A, dC, dB, DType(param.alpha), DType(0), !tA, false, s)); } template static void op(const Tensor& dC, @@ -824,8 +824,8 @@ struct trsm_backward { // Compute dA const bool da_left(param.rightside == param.transpose); DType scale(-1.0 / param.alpha); - (da_left ? gemm::op(dB, C, dA, scale, DType(0), param.transpose, !param.transpose, s) - : gemm::op(C, dB, dA, scale, DType(0), !param.transpose, param.transpose, s)); + (da_left ? gemm::op(dB, C, dA, scale, DType(0), param.transpose, !param.transpose, s) : + gemm::op(C, dB, dA, scale, DType(0), !param.transpose, param.transpose, s)); using namespace mxnet_op; Kernel::Launch( s, dA.MSize(), dA.size(1) * dA.stride_, dA.stride_, dA.dptr_, !param.lower); diff --git a/src/operator/tensor/la_op.h b/src/operator/tensor/la_op.h index dd993887e2c3..3d9eebdb0644 100644 --- a/src/operator/tensor/la_op.h +++ b/src/operator/tensor/la_op.h @@ -283,8 +283,8 @@ inline bool LaDiagTrianShape(const nnvm::NodeAttrs& attrs, if (ndim == 0) { return false; } - const int offset = (diag ? nnvm::get(attrs.parsed).offset - : nnvm::get(attrs.parsed).offset); + const int offset = (diag ? nnvm::get(attrs.parsed).offset : + nnvm::get(attrs.parsed).offset); std::vector oshape(extract ? ndim - 1 : ndim + 1); for (int i = 0; i < ndim - 1; ++i) { oshape[i] = (*in_attrs)[0][i]; @@ -710,8 +710,8 @@ void LaOpGemmForward(const nnvm::NodeAttrs& attrs, using namespace mshadow; CHECK_EQ(inputs.size(), inum); CHECK_EQ(outputs.size(), onum); - const int axis(inputs.size() == 2 ? nnvm::get(attrs.parsed).axis - : nnvm::get(attrs.parsed).axis); + const int axis(inputs.size() == 2 ? nnvm::get(attrs.parsed).axis : + nnvm::get(attrs.parsed).axis); MSHADOW_SGL_DBL_TYPE_SWITCH(outputs[0].type_flag_, OType, { if (axis == -2 || axis == inputs[0].ndim() - 2) { LaOpCaller::op(inputs, outputs, attrs, ctx); @@ -732,8 +732,8 @@ void LaOpGemmBackward(const nnvm::NodeAttrs& attrs, Stream* s = ctx.get_stream(); CHECK_EQ(inputs.size(), inum); CHECK_EQ(outputs.size(), onum); - const int axis(inputs.size() == 3 ? nnvm::get(attrs.parsed).axis - : nnvm::get(attrs.parsed).axis); + const int axis(inputs.size() == 3 ? nnvm::get(attrs.parsed).axis : + nnvm::get(attrs.parsed).axis); MSHADOW_SGL_DBL_TYPE_SWITCH(outputs[0].type_flag_, OType, { std::vector tspace(outputs); for (int i = 0; i < onum; ++i) { diff --git a/src/operator/tensor/matrix_op.cu b/src/operator/tensor/matrix_op.cu index fd8306a96edd..b5bd1c96d25b 100644 --- a/src/operator/tensor/matrix_op.cu +++ b/src/operator/tensor/matrix_op.cu @@ -187,8 +187,8 @@ __global__ void split_tensor_kernel(size_t input_size, LType* out_aligned = reinterpret_cast(params.outputs[section]); size_t section_size_aligned = entries_per_load > 0 ? section_size / entries_per_load : section_size; - size_t index_aligned = entries_per_load > 0 ? params.indices[section] / entries_per_load - : params.indices[section]; + size_t index_aligned = entries_per_load > 0 ? params.indices[section] / entries_per_load : + params.indices[section]; size_t output_offset_leading = (blockIdx.x / blocks_last_axis) * section_size_aligned; size_t output_position = output_offset_leading + position_last_axis_aligned - index_aligned; out_aligned[output_position] = input_data; @@ -330,9 +330,9 @@ inline void SplitOpForwardGPU(const nnvm::NodeAttrs& attrs, if (splitting_last_axis) { // may not be possible to include whole axis if too many sections last_axis_elements = - entries_per_load > 0 - ? ((params.indices[params.num_sections] - params.indices[0]) / entries_per_load) - : 0; + entries_per_load > 0 ? + ((params.indices[params.num_sections] - params.indices[0]) / entries_per_load) : + 0; } while (block_size < last_axis_elements && (block_size < max_threads_block)) { block_size += 32; diff --git a/src/operator/tensor/reduce_rtc.cc b/src/operator/tensor/reduce_rtc.cc index 5b6d89ebf774..bfa5d0a50e28 100644 --- a/src/operator/tensor/reduce_rtc.cc +++ b/src/operator/tensor/reduce_rtc.cc @@ -362,9 +362,9 @@ void RTCReduceImpl(Stream* s, args.emplace_back(¶m); args.emplace_back(&config.Mnext); - const auto& function_code = (lhs == nullptr) - ? (use_index ? reduce_function_index_code : reduce_function_code) - : reduce_function_use_input_code; + const auto& function_code = (lhs == nullptr) ? + (use_index ? reduce_function_index_code : reduce_function_code) : + reduce_function_use_input_code; const auto& kernel_name = (config.Mnext > 1) ? "reduce_kernel_multi" : "reduce_kernel_single"; auto reduce_kernel_func = get_function(code + function_code, kernel_name, reduce_kernel_code, dev_id); @@ -497,9 +497,9 @@ void RTCReduceM1Impl(Stream* s, args.emplace_back(&small.dptr_); args.emplace_back(¶m); - const auto& function_code = (lhs == nullptr) - ? (use_index ? reduce_function_index_code : reduce_function_code) - : reduce_function_use_input_code; + const auto& function_code = (lhs == nullptr) ? + (use_index ? reduce_function_index_code : reduce_function_code) : + reduce_function_use_input_code; auto reduce_kernel_M1_func = get_function(code + function_code, "reduce_kernel_M1", reduce_kernel_M1_code, dev_id); launch(reduce_kernel_M1_func, config.kernel_1.gridDim, config.kernel_1.blockDim, 0, s, &args); diff --git a/src/operator/tensor/square_sum.cc b/src/operator/tensor/square_sum.cc index 0ce48c6843f5..05917b6c1382 100644 --- a/src/operator/tensor/square_sum.cc +++ b/src/operator/tensor/square_sum.cc @@ -27,7 +27,7 @@ namespace mxnet { namespace op { template <> -void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx) { +void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx){ MSHADOW_IDX_TYPE_SWITCH(ograd_row_idx.type_flag_, IType, { diff --git a/src/operator/tensor/square_sum.cu b/src/operator/tensor/square_sum.cu index 92042e54206e..d41f0aa02918 100644 --- a/src/operator/tensor/square_sum.cu +++ b/src/operator/tensor/square_sum.cu @@ -27,7 +27,7 @@ namespace mxnet { namespace op { template <> -void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx) { +void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx){ MSHADOW_IDX_TYPE_SWITCH(ograd_row_idx.type_flag_, IType, { diff --git a/src/profiler/aggregate_stats.cc b/src/profiler/aggregate_stats.cc index 2cb8759e095a..78ae4d35ffb2 100644 --- a/src/profiler/aggregate_stats.cc +++ b/src/profiler/aggregate_stats.cc @@ -131,9 +131,9 @@ void AggregateStats::DumpTable(std::ostream& os, int sort_by, int ascending) { << " " << std::fixed << std::setw(16) << std::setprecision(4) << std::right << (is_memory ? ByteToKilobyte(data.max_aggregate_) : MicroToMilli(data.max_aggregate_)) << " " << std::fixed << std::setw(16) << std::setprecision(4) << std::right - << (data.type_ == AggregateStats::StatData::kCounter - ? ByteToKilobyte((data.max_aggregate_ - data.min_aggregate_) / 2) - : MicroToMilli(static_cast(data.total_aggregate_) / data.total_count_)); + << (data.type_ == AggregateStats::StatData::kCounter ? + ByteToKilobyte((data.max_aggregate_ - data.min_aggregate_) / 2) : + MicroToMilli(static_cast(data.total_aggregate_) / data.total_count_)); os << std::endl; } heap.pop(); @@ -181,9 +181,9 @@ void AggregateStats::DumpJson(std::ostream& os, int sort_by, int ascending) { << (is_memory ? ByteToKilobyte(data.max_aggregate_) : MicroToMilli(data.max_aggregate_)) << "," << std::endl << " \"Avg\": " << std::setprecision(4) - << (data.type_ == AggregateStats::StatData::kCounter - ? ByteToKilobyte((data.max_aggregate_ - data.min_aggregate_) / 2) - : MicroToMilli(static_cast(data.total_aggregate_) / data.total_count_)) + << (data.type_ == AggregateStats::StatData::kCounter ? + ByteToKilobyte((data.max_aggregate_ - data.min_aggregate_) / 2) : + MicroToMilli(static_cast(data.total_aggregate_) / data.total_count_)) << std::endl << " }" << std::endl; } diff --git a/src/runtime/container.cc b/src/runtime/container.cc index 50a284af56f7..2197c10abb3e 100644 --- a/src/runtime/container.cc +++ b/src/runtime/container.cc @@ -93,8 +93,8 @@ MXNET_REGISTER_GLOBAL("container._MapGetItem").set_body([](MXNetArgs args, MXNet CHECK(ptr->IsInstance()); auto* n = static_cast(ptr); - auto it = n->find(String::CanConvertFrom(args[1]) ? args[1].operator String() - : args[1].operator ObjectRef()); + auto it = n->find(String::CanConvertFrom(args[1]) ? args[1].operator String() : + args[1].operator ObjectRef()); CHECK(it != n->end()) << "cannot find the corresponding key in the Map"; *rv = (*it).second; }); diff --git a/src/serialization/cnpy.cc b/src/serialization/cnpy.cc index 0534b3ae7459..bcd525c5e351 100644 --- a/src/serialization/cnpy.cc +++ b/src/serialization/cnpy.cc @@ -743,8 +743,8 @@ std::pair, std::vector> load_arrays( arrays.push_back(array); return_names.emplace_back(dirname.size() ? // Exclude "/" - dirname.substr(0, dirname.size() - 1) - : dirname); + dirname.substr(0, dirname.size() - 1) : + dirname); } else { throw std::runtime_error("Loading " + format + " sparse matrix format is unsupported."); @@ -881,8 +881,8 @@ std::pair, std::vector> load_arrays( arrays.push_back(array); return_names.emplace_back(dirname.size() ? // Exclude "/" - dirname.substr(0, dirname.size() - 1) - : dirname); + dirname.substr(0, dirname.size() - 1) : + dirname); } else { throw std::runtime_error("Loading " + format + " sparse matrix format is unsupported."); diff --git a/src/storage/pooled_storage_manager.h b/src/storage/pooled_storage_manager.h index f6e60c56fbf8..9d1c3900ace1 100644 --- a/src/storage/pooled_storage_manager.h +++ b/src/storage/pooled_storage_manager.h @@ -207,7 +207,7 @@ void PooledStorageManager::Alloc(Storage::Hand #if MXNET_USE_CUDA dev_type_ == Context::kGPU ? cudaGetErrorString(static_cast(e)) : #endif - std::strerror(errno)); + std::strerror(errno)); LOG(FATAL) << "Memory allocation failed " << err; } From 2b58452402a6ad0f8cf9734c7b10673ccdf883f3 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 4 Nov 2021 09:01:02 +0100 Subject: [PATCH 02/10] [TEST] Re-format .cc .h files --- tests/cpp/engine/engine_shutdown_test.cc | 17 +- tests/cpp/engine/omp_test.cc | 35 +- tests/cpp/engine/thread_local_test.cc | 64 +- tests/cpp/engine/threaded_engine_test.cc | 272 +++--- tests/cpp/include/test_core_op.h | 192 ++-- tests/cpp/include/test_legacy_op.h | 245 ++--- tests/cpp/include/test_ndarray_utils.h | 115 ++- tests/cpp/include/test_op.h | 90 +- tests/cpp/include/test_op_runner.h | 143 ++- tests/cpp/include/test_perf.h | 120 ++- tests/cpp/include/test_tune.h | 122 +-- tests/cpp/include/test_util.h | 250 +++-- tests/cpp/kvstore/gpu_topology_test.cc | 278 +++--- tests/cpp/misc/base.cc | 30 +- tests/cpp/operator/activation_perf.cc | 69 +- tests/cpp/operator/batchnorm_test.cc | 874 +++++++++--------- tests/cpp/operator/coreop_perf.cc | 61 +- tests/cpp/operator/dnnl_operator_test.cc | 4 +- tests/cpp/operator/dropout_perf.cc | 58 +- tests/cpp/operator/fully_conn_perf.cc | 62 +- tests/cpp/operator/krprod_test.cc | 115 ++- .../operator/runner/core_op_runner_test.cc | 196 ++-- tests/cpp/operator/slice_channel_perf.cc | 52 +- tests/cpp/operator/tune/operator_tune_test.cc | 66 +- tests/cpp/storage/storage_test.cc | 26 +- tests/cpp/test_main.cc | 23 +- 26 files changed, 1719 insertions(+), 1860 deletions(-) diff --git a/tests/cpp/engine/engine_shutdown_test.cc b/tests/cpp/engine/engine_shutdown_test.cc index 893d08502c3a..98830796e2cf 100644 --- a/tests/cpp/engine/engine_shutdown_test.cc +++ b/tests/cpp/engine/engine_shutdown_test.cc @@ -21,7 +21,7 @@ * Copyright (c) 2019 by Contributors * \file engine_shutdown_test.cc * \brief Tests engine shutdown for possible crashes -*/ + */ #include #include "../src/engine/engine_impl.h" @@ -29,13 +29,14 @@ /** * This test will help ensure we don't crash during engine shutdown. - * The crash happens during a static destructor call, so this test may pass and then cause a test-run process crash. + * The crash happens during a static destructor call, so this test may pass and then cause a + * test-run process crash. */ TEST(EngineShutdown, stop_without_crashing) { - static std::unique_ptr ndArray; - { - auto engine = mxnet::Engine::_GetSharedRef(); - ndArray = std::make_unique(mxnet::Context::CPU()); - engine->Stop(); - } + static std::unique_ptr ndArray; + { + auto engine = mxnet::Engine::_GetSharedRef(); + ndArray = std::make_unique(mxnet::Context::CPU()); + engine->Stop(); + } } diff --git a/tests/cpp/engine/omp_test.cc b/tests/cpp/engine/omp_test.cc index f4ef421a8595..c6cb5c0470c6 100644 --- a/tests/cpp/engine/omp_test.cc +++ b/tests/cpp/engine/omp_test.cc @@ -28,24 +28,23 @@ #include #include - TEST(OMPBehaviour, after_fork) { - /* - * Check that after fork, OMP is disabled, and the recommended thread count is 1 to prevent - * process fanout. - */ - using namespace mxnet::engine; - auto openmp = OpenMP::Get(); - pid_t pid = fork(); - if (pid == 0) { - EXPECT_FALSE(openmp->enabled()); - EXPECT_EQ(openmp->GetRecommendedOMPThreadCount(), 1); - } else if (pid > 0) { - int status; - int ret = waitpid(pid, &status, 0); - CHECK_EQ(ret, pid) << "waitpid failed"; - } else { - CHECK(false) << "fork failed"; - } + /* + * Check that after fork, OMP is disabled, and the recommended thread count is 1 to prevent + * process fanout. + */ + using namespace mxnet::engine; + auto openmp = OpenMP::Get(); + pid_t pid = fork(); + if (pid == 0) { + EXPECT_FALSE(openmp->enabled()); + EXPECT_EQ(openmp->GetRecommendedOMPThreadCount(), 1); + } else if (pid > 0) { + int status; + int ret = waitpid(pid, &status, 0); + CHECK_EQ(ret, pid) << "waitpid failed"; + } else { + CHECK(false) << "fork failed"; + } } #endif diff --git a/tests/cpp/engine/thread_local_test.cc b/tests/cpp/engine/thread_local_test.cc index 6801b377ef83..a30577ef263d 100644 --- a/tests/cpp/engine/thread_local_test.cc +++ b/tests/cpp/engine/thread_local_test.cc @@ -21,7 +21,7 @@ * Copyright (c) 2019 by Contributors * \file engine_thread_local_test.cc * \brief Tests thread safety and lifetime of thread local store -*/ + */ #include #include #include @@ -37,44 +37,42 @@ #include struct A { - std::vector a; + std::vector a; }; -int num_threads = 10; +int num_threads = 10; int num_elements = num_threads * 10; static int ThreadSafetyTest(int num, std::vector* tmp_inputs, std::vector* res) { - A *ret = dmlc::ThreadLocalStore::Get(); - for (size_t i = num * 10; i < num * 10 + 10; ++i) { - (*tmp_inputs)[i] = i; - } - ret->a.clear(); - ret->a.reserve(10); - for (size_t i = num * 10; i < num * 10 + 10; ++i) { - ret->a.push_back((*tmp_inputs)[i]); - } - (*res)[num] = dmlc::BeginPtr(ret->a); - return 0; + A* ret = dmlc::ThreadLocalStore::Get(); + for (size_t i = num * 10; i < num * 10 + 10; ++i) { + (*tmp_inputs)[i] = i; + } + ret->a.clear(); + ret->a.reserve(10); + for (size_t i = num * 10; i < num * 10 + 10; ++i) { + ret->a.push_back((*tmp_inputs)[i]); + } + (*res)[num] = dmlc::BeginPtr(ret->a); + return 0; } TEST(ThreadLocal, VerifyThreadSafety) { - std::vector tmp_inputs; - tmp_inputs.resize(num_elements); - std::vector outputs; - outputs.resize(num_threads); - auto func = [&](int num) { - ThreadSafetyTest(num, &tmp_inputs, &outputs); - }; - std::vector worker_threads(num_threads); - int count = 0; - for (auto&& i : worker_threads) { - i = std::thread(func, count); - count++; - } - for (auto&& i : worker_threads) { - i.join(); - } + std::vector tmp_inputs; + tmp_inputs.resize(num_elements); + std::vector outputs; + outputs.resize(num_threads); + auto func = [&](int num) { ThreadSafetyTest(num, &tmp_inputs, &outputs); }; + std::vector worker_threads(num_threads); + int count = 0; + for (auto&& i : worker_threads) { + i = std::thread(func, count); + count++; + } + for (auto&& i : worker_threads) { + i.join(); + } - for (size_t i = 0; i < num_elements; i++) { - CHECK(outputs[i/10][i%10] == i); - } + for (size_t i = 0; i < num_elements; i++) { + CHECK(outputs[i / 10][i % 10] == i); + } } diff --git a/tests/cpp/engine/threaded_engine_test.cc b/tests/cpp/engine/threaded_engine_test.cc index 465e387b8d42..5b86781d6838 100644 --- a/tests/cpp/engine/threaded_engine_test.cc +++ b/tests/cpp/engine/threaded_engine_test.cc @@ -21,7 +21,7 @@ * Copyright (c) 2017 by Contributors * \file threaded_engine_test.cc * \brief threaded engine tests -*/ + */ #include #include #include @@ -57,9 +57,12 @@ static uint32_t seed_ = 0xdeadbeef; /** * generate a list of workloads */ -void GenerateWorkload(int num_workloads, int num_var, - int min_read, int max_read, - int min_time, int max_time, +void GenerateWorkload(int num_workloads, + int num_var, + int min_read, + int max_read, + int min_time, + int max_time, std::vector* workloads) { workloads->clear(); workloads->resize(num_workloads); @@ -68,8 +71,8 @@ void GenerateWorkload(int num_workloads, int num_var, std::uniform_int_distribution distribution_time(min_time, max_time - 1); std::uniform_int_distribution distribution_read(min_read, max_read - 1); for (int i = 0; i < num_workloads; ++i) { - auto& wl = workloads->at(i); - wl.write = distribution_var(generator); + auto& wl = workloads->at(i); + wl.write = distribution_var(generator); int num_read = distribution_read(generator); for (int j = 0; j < num_read; ++j) { wl.reads.push_back(distribution_var(generator)); @@ -83,7 +86,8 @@ void GenerateWorkload(int num_workloads, int num_var, */ void EvaluateWorkload(const Workload& wl, std::vector* data) { double tmp = 0; - for (int i : wl.reads) tmp += data->at(i); + for (int i : wl.reads) + tmp += data->at(i); data->at(wl.write) = tmp / (wl.reads.size() + 1); if (wl.time > 0) { std::this_thread::sleep_for(std::chrono::microseconds(wl.time)); @@ -106,7 +110,8 @@ double EvaluateWorkloads(const std::vector& workloads, } for (const auto& wl : workloads) { - if (wl.reads.size() == 0) continue; + if (wl.reads.size() == 0) + continue; if (engine == nullptr) { EvaluateWorkload(wl, data); } else { @@ -119,7 +124,8 @@ double EvaluateWorkloads(const std::vector& workloads, }; std::vector reads; for (auto i : wl.reads) { - if (i != wl.write) reads.push_back(vars[i]); + if (i != wl.write) + reads.push_back(vars[i]); } engine->PushAsync(func, Context::CPU(), reads, {vars[wl.write]}); } @@ -134,9 +140,9 @@ double EvaluateWorkloads(const std::vector& workloads, TEST(Engine, start_stop) { const int num_engine = 3; std::vector engine(num_engine); - engine[0] = mxnet::engine::CreateNaiveEngine(); - engine[1] = mxnet::engine::CreateThreadedEnginePooled(); - engine[2] = mxnet::engine::CreateThreadedEnginePerDevice(); + engine[0] = mxnet::engine::CreateNaiveEngine(); + engine[1] = mxnet::engine::CreateThreadedEnginePooled(); + engine[2] = mxnet::engine::CreateThreadedEnginePerDevice(); std::string type_names[3] = {"NaiveEngine", "ThreadedEnginePooled", "ThreadedEnginePerDevice"}; for (int i = 0; i < num_engine; ++i) { @@ -150,7 +156,7 @@ TEST(Engine, start_stop) { TEST(Engine, RandSumExpr) { std::vector workloads; - int num_repeat = 5; + int num_repeat = 5; const int num_engine = 4; std::vector t(num_engine, 0.0); @@ -172,19 +178,21 @@ TEST(Engine, RandSumExpr) { } for (int i = 1; i < num_engine; ++i) { - for (int j = 0; j < num_var; ++j) EXPECT_EQ(data[0][j], data[i][j]); + for (int j = 0; j < num_var; ++j) + EXPECT_EQ(data[0][j], data[i][j]); } LOG(INFO) << "data: " << data[0][1] << " " << data[0][2] << "..."; } - - LOG(INFO) << "baseline\t\t" << t[0] << " sec"; - LOG(INFO) << "NaiveEngine\t\t" << t[1] << " sec"; + LOG(INFO) << "baseline\t\t" << t[0] << " sec"; + LOG(INFO) << "NaiveEngine\t\t" << t[1] << " sec"; LOG(INFO) << "ThreadedEnginePooled\t" << t[2] << " sec"; LOG(INFO) << "ThreadedEnginePerDevice\t" << t[3] << " sec"; } -void Foo(mxnet::RunContext, int i) { printf("The fox says %d\n", i); } +void Foo(mxnet::RunContext, int i) { + printf("The fox says %d\n", i); +} void FooAsyncFunc(void*, void*, void* cb_ptr, void* param) { if (param == nullptr) { @@ -222,7 +230,7 @@ TEST(Engine, PushFunc) { // Test #1 LOG(INFO) << "===== Test #1: PushAsync param and deleter ====="; - int* a = new int(100); + int* a = new int(100); int res = MXEnginePushAsync(FooAsyncFunc, a, FooFuncDeleter, &ctx, &var, 1, nullptr, 0); EXPECT_EQ(res, 0); @@ -244,7 +252,7 @@ TEST(Engine, PushFunc) { // Test #5 LOG(INFO) << "===== Test #5: PushSync param and deleter ====="; int* b = new int(101); - res = MXEnginePushSync(FooSyncFunc, b, FooFuncDeleter, &ctx, &var, 1, nullptr, 0); + res = MXEnginePushSync(FooSyncFunc, b, FooFuncDeleter, &ctx, &var, 1, nullptr, 0); EXPECT_EQ(res, 0); // Test #6 @@ -268,82 +276,121 @@ TEST(Engine, PushFuncND) { std::vector nds; const int num_nds = 5; for (int i = 0; i < num_nds; ++i) { - mxnet::NDArray *pnd = new mxnet::NDArray(ctx); - nds.push_back(pnd); + mxnet::NDArray* pnd = new mxnet::NDArray(ctx); + nds.push_back(pnd); } for (int num_const_nds = 0; num_const_nds <= num_nds; ++num_const_nds) { - int num_mutable_nds = num_nds - num_const_nds; - void** const_nds_handle = num_const_nds > 0 ? - reinterpret_cast(nds.data()) : nullptr; - void** mutable_nds_handle = num_mutable_nds > 0 ? - reinterpret_cast(nds.data() + num_const_nds) : nullptr; - - // Test #1 - LOG(INFO) << "===== Test #1: PushAsyncND param and deleter ====="; - int* a = new int(100); - int res = MXEnginePushAsyncND(FooAsyncFunc, a, FooFuncDeleter, &ctx, - const_nds_handle, num_const_nds, - mutable_nds_handle, num_mutable_nds); - EXPECT_EQ(res, 0); - - // Test #2 - LOG(INFO) << "===== Test #2: PushAsyncND NULL param and NULL deleter ====="; - res = MXEnginePushAsyncND(FooAsyncFunc, nullptr, nullptr, &ctx, - const_nds_handle, num_const_nds, - mutable_nds_handle, num_mutable_nds); - EXPECT_EQ(res, 0); - - // Test #3 - LOG(INFO) << "===== Test #3: PushAsyncND invalid number of const nds ====="; - res = MXEnginePushAsyncND(FooAsyncFunc, nullptr, nullptr, &ctx, - const_nds_handle, -1, - mutable_nds_handle, num_mutable_nds); - EXPECT_EQ(res, -1); - - // Test #4 - LOG(INFO) << "===== Test #4: PushAsyncND invalid number of mutable nds ====="; - res = MXEnginePushAsyncND(FooAsyncFunc, nullptr, nullptr, &ctx, - const_nds_handle, num_const_nds, - mutable_nds_handle, -1); - EXPECT_EQ(res, -1); - - // Test #5 - LOG(INFO) << "===== Test #5: PushSyncND param and deleter ====="; - int* b = new int(101); - res = MXEnginePushSyncND(FooSyncFunc, b, FooFuncDeleter, &ctx, - const_nds_handle, num_const_nds, - mutable_nds_handle, num_mutable_nds); - EXPECT_EQ(res, 0); - - // Test #6 - LOG(INFO) << "===== Test #6: PushSyncND NULL param and NULL deleter ====="; - res = MXEnginePushSyncND(FooSyncFunc, nullptr, nullptr, &ctx, - const_nds_handle, num_const_nds, - mutable_nds_handle, num_mutable_nds); - EXPECT_EQ(res, 0); - - // Test #7 - LOG(INFO) << "===== Test #7: PushSyncND invalid number of const nds ====="; - res = MXEnginePushSyncND(FooSyncFunc, nullptr, nullptr, &ctx, - const_nds_handle, -1, - mutable_nds_handle, num_mutable_nds); - EXPECT_EQ(res, -1); - - // Test #8 - LOG(INFO) << "===== Test #8: PushSyncND invalid number of mutable nds ====="; - res = MXEnginePushSyncND(FooSyncFunc, nullptr, nullptr, &ctx, - const_nds_handle, num_const_nds, - mutable_nds_handle, -1); - EXPECT_EQ(res, -1); + int num_mutable_nds = num_nds - num_const_nds; + void** const_nds_handle = num_const_nds > 0 ? reinterpret_cast(nds.data()) : nullptr; + void** mutable_nds_handle = + num_mutable_nds > 0 ? reinterpret_cast(nds.data() + num_const_nds) : nullptr; + + // Test #1 + LOG(INFO) << "===== Test #1: PushAsyncND param and deleter ====="; + int* a = new int(100); + int res = MXEnginePushAsyncND(FooAsyncFunc, + a, + FooFuncDeleter, + &ctx, + const_nds_handle, + num_const_nds, + mutable_nds_handle, + num_mutable_nds); + EXPECT_EQ(res, 0); + + // Test #2 + LOG(INFO) << "===== Test #2: PushAsyncND NULL param and NULL deleter ====="; + res = MXEnginePushAsyncND(FooAsyncFunc, + nullptr, + nullptr, + &ctx, + const_nds_handle, + num_const_nds, + mutable_nds_handle, + num_mutable_nds); + EXPECT_EQ(res, 0); + + // Test #3 + LOG(INFO) << "===== Test #3: PushAsyncND invalid number of const nds ====="; + res = MXEnginePushAsyncND(FooAsyncFunc, + nullptr, + nullptr, + &ctx, + const_nds_handle, + -1, + mutable_nds_handle, + num_mutable_nds); + EXPECT_EQ(res, -1); + + // Test #4 + LOG(INFO) << "===== Test #4: PushAsyncND invalid number of mutable nds ====="; + res = MXEnginePushAsyncND(FooAsyncFunc, + nullptr, + nullptr, + &ctx, + const_nds_handle, + num_const_nds, + mutable_nds_handle, + -1); + EXPECT_EQ(res, -1); + + // Test #5 + LOG(INFO) << "===== Test #5: PushSyncND param and deleter ====="; + int* b = new int(101); + res = MXEnginePushSyncND(FooSyncFunc, + b, + FooFuncDeleter, + &ctx, + const_nds_handle, + num_const_nds, + mutable_nds_handle, + num_mutable_nds); + EXPECT_EQ(res, 0); + + // Test #6 + LOG(INFO) << "===== Test #6: PushSyncND NULL param and NULL deleter ====="; + res = MXEnginePushSyncND(FooSyncFunc, + nullptr, + nullptr, + &ctx, + const_nds_handle, + num_const_nds, + mutable_nds_handle, + num_mutable_nds); + EXPECT_EQ(res, 0); + + // Test #7 + LOG(INFO) << "===== Test #7: PushSyncND invalid number of const nds ====="; + res = MXEnginePushSyncND(FooSyncFunc, + nullptr, + nullptr, + &ctx, + const_nds_handle, + -1, + mutable_nds_handle, + num_mutable_nds); + EXPECT_EQ(res, -1); + + // Test #8 + LOG(INFO) << "===== Test #8: PushSyncND invalid number of mutable nds ====="; + res = MXEnginePushSyncND(FooSyncFunc, + nullptr, + nullptr, + &ctx, + const_nds_handle, + num_const_nds, + mutable_nds_handle, + -1); + EXPECT_EQ(res, -1); } for (mxnet::NDArray* pnd : nds) { - delete pnd; + delete pnd; } } TEST(Engine, basics) { auto&& engine = mxnet::Engine::Get(); - auto&& var = engine->NewVariable(); + auto&& var = engine->NewVariable(); std::vector oprs; // Test #1 @@ -460,9 +507,9 @@ TEST(Engine, basics) { TEST(Engine, VarVersion) { const size_t num_engines = 3; std::vector engines(num_engines); - engines[0] = mxnet::engine::CreateNaiveEngine(); - engines[1] = mxnet::engine::CreateThreadedEnginePooled(); - engines[2] = mxnet::engine::CreateThreadedEnginePerDevice(); + engines[0] = mxnet::engine::CreateNaiveEngine(); + engines[1] = mxnet::engine::CreateThreadedEnginePooled(); + engines[2] = mxnet::engine::CreateThreadedEnginePerDevice(); std::string type_names[3] = {"NaiveEngine", "ThreadedEnginePooled", "ThreadedEnginePerDevice"}; for (size_t k = 0; k < num_engines; ++k) { auto engine = engines[k]; @@ -534,7 +581,7 @@ struct TestSaveAndRestoreOMPState { omp_set_dynamic(dynamic_); } const int nthreads_ = omp_get_max_threads(); - const int dynamic_ = omp_get_dynamic(); + const int dynamic_ = omp_get_dynamic(); }; /*! @@ -542,8 +589,8 @@ struct TestSaveAndRestoreOMPState { */ TEST(Engine, omp_threading_count_scope) { TestSaveAndRestoreOMPState omp_state; - const int THREAD_COUNT = 10; - std::shared_ptr ready = std::make_shared(); + const int THREAD_COUNT = 10; + std::shared_ptr ready = std::make_shared(); std::shared_ptr threads = std::make_shared(); std::atomic counter(0), correct(0); omp_set_dynamic(0); @@ -551,24 +598,27 @@ TEST(Engine, omp_threading_count_scope) { std::string name = "thread: "; name += std::to_string(x + 1); ++counter; - threads->create(name, false, - [x, &counter, &correct](std::shared_ptr ready_ptr) -> int { - const int thread_count = x + 1; - omp_set_num_threads(thread_count); - --counter; - ready_ptr->wait(); - CHECK_EQ(omp_get_max_threads(), thread_count); - #pragma omp parallel for - for (int i = 0; i < 100; ++i) { - if (i == 50) { - const int current_threads = omp_get_num_threads(); - if (current_threads == thread_count) { - ++correct; - } - } - } - return 0; - }, ready); + threads->create( + name, + false, + [x, &counter, &correct](std::shared_ptr ready_ptr) -> int { + const int thread_count = x + 1; + omp_set_num_threads(thread_count); + --counter; + ready_ptr->wait(); + CHECK_EQ(omp_get_max_threads(), thread_count); +#pragma omp parallel for + for (int i = 0; i < 100; ++i) { + if (i == 50) { + const int current_threads = omp_get_num_threads(); + if (current_threads == thread_count) { + ++correct; + } + } + } + return 0; + }, + ready); } while (counter.load() > 0) { usleep(100); diff --git a/tests/cpp/include/test_core_op.h b/tests/cpp/include/test_core_op.h index ecbfcd5d7d3a..0ff089cc5666 100644 --- a/tests/cpp/include/test_core_op.h +++ b/tests/cpp/include/test_core_op.h @@ -34,16 +34,13 @@ namespace test { namespace op { // Tried making this a struct w/constexpr, but getting undefined reference on gcc 5.4.1 -#define COREOP_FWD_OP_NAME_KEY "fwd_op_name" -#define COREOP_BWD_OP_NAME_KEY "bwd_op_name" -#define COREOP_BWD_OP_NAME_VALUE_NONE "[none]" +#define COREOP_FWD_OP_NAME_KEY "fwd_op_name" +#define COREOP_BWD_OP_NAME_KEY "bwd_op_name" +#define COREOP_BWD_OP_NAME_VALUE_NONE "[none]" -enum TimingDirection { - kForward, - kBackward -}; +enum TimingDirection { kForward, kBackward }; -inline const char *TimingDirectionAsString(const TimingDirection td) { +inline const char* TimingDirectionAsString(const TimingDirection td) { switch (td) { case kForward: return "Forward"; @@ -59,9 +56,9 @@ inline const char *TimingDirectionAsString(const TimingDirection td) { * Low-noise operator executor * @tparam DType Data type for the operator executions */ -template -class CoreOpExecutor : public test::op::OperatorDataInitializer - , public test::op::OperatorExecutorTiming { +template +class CoreOpExecutor : public test::op::OperatorDataInitializer, + public test::op::OperatorExecutorTiming { /*! \brief Performance timing categories */ /*! * \brief Parse additional arguments into NodeAttrs structure @@ -69,13 +66,13 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \param args vector of string pairs representing argument key/value pairs * \return Constructed NodeAttrs structure */ - static nnvm::NodeAttrs ParseAttrs(const nnvm::Op *op, const kwargs_t& args) { + static nnvm::NodeAttrs ParseAttrs(const nnvm::Op* op, const kwargs_t& args) { const size_t count = args.size(); - std::vector keys, values; + std::vector keys, values; keys.reserve(count); values.reserve(count); - for (kwargs_t::const_iterator i_iter = args.begin(), e_iter = args.end(); - i_iter != e_iter; ++i_iter) { + for (kwargs_t::const_iterator i_iter = args.begin(), e_iter = args.end(); i_iter != e_iter; + ++i_iter) { keys.emplace_back(i_iter->first.c_str()); values.emplace_back(i_iter->second.c_str()); } @@ -89,7 +86,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \return Reference to the supplied vector of TBlob results */ static inline std::vector& CollectBlobs(const std::vector& src, - std::vector *dest) { + std::vector* dest) { dest->resize(0); dest->reserve(dest->size() + src.size()); for (size_t i = 0, n = src.size(); i < n; ++i) { @@ -128,7 +125,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer nnvm::ObjectPtr MakeNode() const { nnvm::ObjectPtr node = nnvm::Node::Create(); - node->attrs = attrs_; + node->attrs = attrs_; return node; } @@ -138,7 +135,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer */ std::vector, std::string>> GetBackward() { std::vector, std::string>> res; - static auto gradient = nnvm::Op::GetAttr("FGradient"); + static auto gradient = nnvm::Op::GetAttr("FGradient"); nnvm::FGradient grad_fun = gradient.get(op_, nullptr); if (grad_fun) { auto n = MakeNode(); @@ -154,8 +151,8 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer std::cout << node_entry.node->op()->name << std::endl; } std::shared_ptr pOp = std::make_shared( - ctx().run_ctx.ctx.dev_type == Context::kGPU, ShapesOf(outputs())); - res.push_back({ pOp, node_entry.node->op()->name }); + ctx().run_ctx.ctx.dev_type == Context::kGPU, ShapesOf(outputs())); + res.push_back({pOp, node_entry.node->op()->name}); } } return res; @@ -167,10 +164,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \param attrs NodeAttrs structure (node attributes) * \param op Pointer to nnvm Operator object */ - void AttachResources(OpContext *ctx, const nnvm::NodeAttrs& attrs, const nnvm::Op *op) { + void AttachResources(OpContext* ctx, const nnvm::NodeAttrs& attrs, const nnvm::Op* op) { std::vector reqs; std::vector& requested = ctx->requested; - static auto& fresource = nnvm::Op::GetAttr("FResourceRequest"); + static auto& fresource = nnvm::Op::GetAttr("FResourceRequest"); if (fresource.count(op) != 0) { reqs = fresource[op](attrs); } else { @@ -218,7 +215,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer } public: - typedef DType DataType; + typedef DType DataType; typedef AccReal AccRealType; /*! \brief Add 'fwd_op_name' to kwargs and return the new kwargs */ @@ -233,9 +230,9 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer new_args.emplace_back(a); } } - new_args.push_back({ COREOP_FWD_OP_NAME_KEY, fwd_op_name}); + new_args.push_back({COREOP_FWD_OP_NAME_KEY, fwd_op_name}); if (!bwd_op_name.empty()) { - new_args.push_back({ COREOP_BWD_OP_NAME_KEY, bwd_op_name}); + new_args.push_back({COREOP_BWD_OP_NAME_KEY, bwd_op_name}); } return new_args; } @@ -267,11 +264,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \param shapes Array of input shapes */ CoreOpExecutor(const bool isGPU, const mxnet::ShapeVector& shapes) - : input_shapes_(shapes) - , op_(nullptr) { - ctx_.is_train = true; - ctx_.run_ctx.ctx.dev_id = 0; - ctx_.run_ctx.stream = nullptr; + : input_shapes_(shapes), op_(nullptr) { + ctx_.is_train = true; + ctx_.run_ctx.ctx.dev_id = 0; + ctx_.run_ctx.stream = nullptr; ctx_.run_ctx.ctx.dev_type = Context::kCPU; #if MXNET_USE_CUDA if (isGPU) { @@ -300,7 +296,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer } nnvm::ObjectPtr GetBackwardDependency(const nnvm::ObjectPtr& node, - std::map* index2array) const { + std::map* index2array) const { index2array->clear(); static auto& fgradient = nnvm::Op::GetAttr("FGradient"); @@ -331,9 +327,9 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer return nullptr; } - nnvm::ObjectPtr CalcBackwardPass(std::map *index2array) const { + nnvm::ObjectPtr CalcBackwardPass(std::map* index2array) const { nnvm::ObjectPtr node = nnvm::Node::Create(); - node->attrs = attrs_; + node->attrs = attrs_; return GetBackwardDependency(node, index2array); } @@ -343,11 +339,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \param inputs Optional input data (otherwise, random data will be used as input) */ void Init(const kwargs_t& in_args, - const std::vector& inputs = {}, - const std::vector& outputs = {}, - const CoreOpExecutor *backward_for_op = nullptr, - nnvm::ObjectPtr bwd_node_ptr = nullptr - ) { + const std::vector& inputs = {}, + const std::vector& outputs = {}, + const CoreOpExecutor* backward_for_op = nullptr, + nnvm::ObjectPtr bwd_node_ptr = nullptr) { if (!initialized_) { initialized_ = true; @@ -356,7 +351,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer CHECK(op_name.empty() == false); CHECK(!backward_for_op || bwd_op_name.empty()) - << "Backward op should not be supplied another backward operator"; + << "Backward op should not be supplied another backward operator"; if (verbose_ && backward_for_op) { std::cout << "Backward op: " << op_name; @@ -365,7 +360,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer op_ = nnvm::Op::Get(op_name); CHECK_NOTNULL(op_); - std::map index2array; + std::map index2array; nnvm::ObjectPtr bwd_node_ptr; if (backward_for_op) { bwd_node_ptr = backward_for_op->CalcBackwardPass(&index2array); @@ -400,12 +395,12 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer std::vector input_shapes; if (!input_shapes_.empty()) { for (size_t i = 0, n = num_inputs; i < n; ++i) { - input_shapes.emplace_back(i < input_shapes_.size() ? input_shapes_[i] - : input_shapes_[input_shapes_.size() - - 1]); + input_shapes.emplace_back(i < input_shapes_.size() ? + input_shapes_[i] : + input_shapes_[input_shapes_.size() - 1]); } } - std::vector inputs_p, outputs_p; + std::vector inputs_p, outputs_p; if (!outputs.empty()) { CHECK_EQ(outputs.size(), static_cast(inferred_num_outputs)); @@ -438,9 +433,9 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer const int map_key = bwd_node_ptr->inputs[i].index; CHECK(index2array.find(map_key) != index2array.end()); const int dtype = index2array[map_key]->dtype(); - input_types[i] = dtype; + input_types[i] = dtype; } - for (const auto &fwd_inp : backward_for_op->inputs()) { + for (const auto& fwd_inp : backward_for_op->inputs()) { const int dtype = fwd_inp.data().type_flag_; output_types.emplace_back(dtype); } @@ -448,7 +443,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer for (int x = 0; x < num_inputs; ++x) { input_types.emplace_back(default_dtype()); } - for (const auto &fwd_inp : backward_for_op->inputs()) { + for (const auto& fwd_inp : backward_for_op->inputs()) { const int dtype = fwd_inp.data().type_flag_; output_types.emplace_back(dtype); } @@ -482,7 +477,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer for (int i = 0; i < num_inputs; ++i) { const int map_key = bwd_node_ptr->inputs[i].index; CHECK(index2array.find(map_key) != index2array.end()); - const mxnet::TShape &shp = index2array[map_key]->shape(); + const mxnet::TShape& shp = index2array[map_key]->shape(); input_shapes.push_back(shp); const mxnet::TShape ss = input_shapes[i]; } @@ -503,22 +498,21 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer for (size_t i = 0; i < static_cast(inferred_num_outputs); ++i) { // If supplied and valid, pass from the supplied outputs vector // Otherwise use empty for forward pass, or zero-filled for backward pass - outputs_.emplace_back(i < outputs.size() ? outputs[i] - : (backward_for_op - ? CreateZeroArray(output_shapes[i], - ctx_.run_ctx, - output_types[i]) - : NDArray())); + outputs_.emplace_back( + i < outputs.size() ? + outputs[i] : + (backward_for_op ? + CreateZeroArray(output_shapes[i], ctx_.run_ctx, output_types[i]) : + NDArray())); outputs_p.emplace_back(&*outputs_.rbegin()); } } for (size_t i = 0; i < static_cast(num_inputs); ++i) { CHECK_LT(i, static_cast(input_shapes.size())); - inputs_.emplace_back(i < inputs.size() - ? inputs[i] : CreateRandArray(input_shapes[i], - ctx_.run_ctx, - input_types[i])); + inputs_.emplace_back(i < inputs.size() ? + inputs[i] : + CreateRandArray(input_shapes[i], ctx_.run_ctx, input_types[i])); inputs_p.emplace_back(&*inputs_.rbegin()); } @@ -533,15 +527,15 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer CollectBlobs(inputs_, &blob_inputs_); CollectBlobs(outputs_, &blob_outputs_); - function_ = common::GetFCompute(op_, "FCompute", ctx_.run_ctx.ctx); + function_ = common::GetFCompute(op_, "FCompute", ctx_.run_ctx.ctx); functionex_ = common::GetFCompute(op_, "FComputeEx", ctx_.run_ctx.ctx); - stateful_function_ = common::GetFCompute(op_, "FStatefulCompute", - ctx_.run_ctx.ctx); + stateful_function_ = + common::GetFCompute(op_, "FStatefulCompute", ctx_.run_ctx.ctx); AttachResources(&ctx_, attrs_, op_); auto& is_layer_backward = Op::GetAttr("TIsLayerOpBackward"); - auto& createop = nnvm::Op::GetAttr("FCreateOpState"); + auto& createop = nnvm::Op::GetAttr("FCreateOpState"); if (createop.count(op_) || is_layer_backward.get(op_, false)) { if (backward_for_op) { state_ = backward_for_op->state_; @@ -562,7 +556,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer if (bwd_op_name != COREOP_BWD_OP_NAME_VALUE_NONE) { // Backward op was specified std::shared_ptr pOp = std::make_shared( - ctx().run_ctx.ctx.dev_type == Context::kGPU, ShapesOf(this->outputs())); + ctx().run_ctx.ctx.dev_type == Context::kGPU, ShapesOf(this->outputs())); bwd.push_back({pOp, bwd_op_name}); } else { no_backward = true; @@ -573,9 +567,9 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer } if (!no_backward) { CHECK_GE(bwd.size(), 1U) - << "Can't automatically determine backward op name. Please specify"; + << "Can't automatically determine backward op name. Please specify"; - for (std::pair, std::string> &bw_item : bwd) { + for (std::pair, std::string>& bw_item : bwd) { bw_item.first->set_verbose(verbose_); backward_.emplace_back(bw_item.first); bw_item.first->Init(ArgsWithOpName(args, bw_item.second), {}, {}, this); @@ -585,15 +579,15 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer } } - template - inline bool initForward(const OpProp &opProp, std::vector *in_type) { + template + inline bool initForward(const OpProp& opProp, std::vector* in_type) { Init(opProp.GetArgs()); resetForward(); return true; } - template - inline bool initBackward(const OpProp &opProp, std::vector *in_type) { + template + inline bool initBackward(const OpProp& opProp, std::vector* in_type) { resetBackward(); return true; } @@ -670,7 +664,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer CHECK(HasBackward()); if (!backward_.empty()) { // Avoid locked ref count here - for (std::shared_ptr &p : backward_) { + for (std::shared_ptr& p : backward_) { p->Execute(); } return true; @@ -686,7 +680,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer CHECK(HasBackward()); if (!backward_.empty()) { // Avoid locked ref count here - for (std::shared_ptr &p : backward_) { + for (std::shared_ptr& p : backward_) { p->ExecuteEx(); } return true; @@ -702,7 +696,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer CHECK(HasBackward()); if (!backward_.empty()) { // Avoid locked ref count here - for (std::shared_ptr &p : backward_) { + for (std::shared_ptr& p : backward_) { p->ExecuteStateful(); } return true; @@ -714,19 +708,35 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer * \brief Access input NDArray vector * \return reference to NDArray vector of forward inputs */ - std::vector& inputs() { return inputs_; } - const std::vector& inputs() const { return inputs_; } - std::vector& input_blobs() { return blob_inputs_; } - const std::vector& input_blobs() const { return blob_inputs_; } + std::vector& inputs() { + return inputs_; + } + const std::vector& inputs() const { + return inputs_; + } + std::vector& input_blobs() { + return blob_inputs_; + } + const std::vector& input_blobs() const { + return blob_inputs_; + } /*! * \brief Access input NDArray vector * \return reference to NDArray vector of forward outputs */ - std::vector& outputs() { return outputs_; } - const std::vector& outputs() const { return outputs_; } - std::vector& output_blobs() { return blob_outputs_; } - const std::vector& output_blobs() const { return blob_outputs_; } + std::vector& outputs() { + return outputs_; + } + const std::vector& outputs() const { + return outputs_; + } + std::vector& output_blobs() { + return blob_outputs_; + } + const std::vector& output_blobs() const { + return blob_outputs_; + } /*! * \brief Backward inputs (i.e. output grad) @@ -792,7 +802,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer /* * \brief Pointer to the operator object */ - const nnvm::Op *op_; + const nnvm::Op* op_; /*! * \brief Operator attributes */ @@ -838,17 +848,21 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer class CoreOpProp { public: - virtual void Init(const kwargs_t& kwargs) { kwargs_ = kwargs; } - const kwargs_t& GetArgs() const { return kwargs_; } + virtual void Init(const kwargs_t& kwargs) { + kwargs_ = kwargs; + } + const kwargs_t& GetArgs() const { + return kwargs_; + } virtual ~CoreOpProp() {} + private: - kwargs_t kwargs_; + kwargs_t kwargs_; }; -template +template using CoreOperatorRunner = test::OperatorRunner>; - /*! * \brief Rune a core op forward and backward * \tparam DType Data type @@ -860,13 +874,13 @@ using CoreOperatorRunner = test::OperatorRunner +template inline void BasicRunCoreOpBidirectional(const bool isGPU, bool verbose, const kwargs_t& op_kwargs, const mxnet::ShapeVector& shapes, - const char *op_name, - const char *backward_op_name = "") { + const char* op_name, + const char* backward_op_name = "") { test::op::CoreOpExecutor op(isGPU, shapes); op.set_verbose(verbose); diff --git a/tests/cpp/include/test_legacy_op.h b/tests/cpp/include/test_legacy_op.h index fdb52cf6e4e0..4259751b71c4 100644 --- a/tests/cpp/include/test_legacy_op.h +++ b/tests/cpp/include/test_legacy_op.h @@ -60,8 +60,8 @@ namespace op { * \tparam DType */ template -class LegacyOperatorExecutor : public OperatorDataInitializer - , public OperatorExecutorTiming { +class LegacyOperatorExecutor : public OperatorDataInitializer, + public OperatorExecutorTiming { public: typedef DType DataType; typedef AccReal AccRealType; @@ -69,14 +69,17 @@ class LegacyOperatorExecutor : public OperatorDataInitializer /*! \brief Manage test blobs and context */ LegacyOperatorExecutor(const bool isGPU, const mxnet::ShapeVector& topShapes) #if !MXNET_USE_CUDA - : isGPU_(false) + : isGPU_(false) #else - : isGPU_(isGPU) + : isGPU_(isGPU) #endif - , initializeForward_(0) // unit testing may call inits in any order based - , initializeBackward_(0) // upon its use-case (ie may not want to run forward pass first) - , initializeCallback_(0) { - opContext_.is_train = true; + , + initializeForward_(0) // unit testing may call inits in any order based + , + initializeBackward_(0) // upon its use-case (ie may not want to run forward pass first) + , + initializeCallback_(0) { + opContext_.is_train = true; opContext_.run_ctx.stream = nullptr; CHECK(!topShapes.empty()); shape_input_vec_ = topShapes; @@ -93,14 +96,14 @@ class LegacyOperatorExecutor : public OperatorDataInitializer virtual void resetBackward() {} /*! \brief Initialize auxiliary and output blobs */ - template - bool initForward(const OperatorPropertyType &opProp, std::vector *in_type) { + template + bool initForward(const OperatorPropertyType& opProp, std::vector* in_type) { if (!initializeForward_++) { shape_input_vec_.resize(opProp.ListArguments().size()); op_.reset(opProp.CreateOperatorEx(getContext(), &shape_input_vec_, in_type)); if (op_) { const size_t output_count = opProp.ListOutputs().size(); - const size_t aux_count = opProp.ListAuxiliaryStates().size(); + const size_t aux_count = opProp.ListAuxiliaryStates().size(); // Figure out what sort of blobs we need to allocate mxnet::ShapeVector out_shape, aux_shape; out_shape.resize(output_count); @@ -150,19 +153,23 @@ class LegacyOperatorExecutor : public OperatorDataInitializer } /*! \brief Initialize auxiliary and output blobs */ - template - bool initBackward(const OperatorPropertyType &opProp, std::vector *in_type) { + template + bool initBackward(const OperatorPropertyType& opProp, std::vector* in_type) { initForward(opProp, in_type); if (!initializeBackward_++) { for (size_t x = 0, n = static_cast(opProp.NumVisibleOutputs()); x < n; ++x) { CHECK_LT(x, c_.blob_output_vec_.size()); - allocateBlob(&c_.blob_out_grad_, c_.blob_output_vec_[x].shape_, - false, c_.blob_output_vec_[x].type_flag_); + allocateBlob(&c_.blob_out_grad_, + c_.blob_output_vec_[x].shape_, + false, + c_.blob_output_vec_[x].type_flag_); } for (size_t x = 0, n = c_.blob_input_vec_.size(); x < n; ++x) { - allocateBlob(&c_.blob_in_grad_, c_.blob_input_vec_[x].shape_, - false, c_.blob_input_vec_[x].type_flag_); + allocateBlob(&c_.blob_in_grad_, + c_.blob_input_vec_[x].shape_, + false, + c_.blob_input_vec_[x].type_flag_); } // Get the resource of temporal space @@ -180,18 +187,14 @@ class LegacyOperatorExecutor : public OperatorDataInitializer void forward(const size_t count = 1) { const std::vector req(c_.blob_output_vec_.size(), kWriteTo); // Possibly move data to/from CPU and GPU (outside of timing scope) - MXNET_CUDA_ONLY(std::unique_ptr gpuData(isGPU_ ? - new GPUOpData(c_, &opContext_) : nullptr)); - perf::TimingItem timeF(&OperatorExecutorTiming::GetTiming(), Forward, - "Forward", count); + MXNET_CUDA_ONLY( + std::unique_ptr gpuData(isGPU_ ? new GPUOpData(c_, &opContext_) : nullptr)); + perf::TimingItem timeF(&OperatorExecutorTiming::GetTiming(), Forward, "Forward", count); if (!isGPU_) { mxnet::profiler::vtune::VTuneResume profile; // VTune sample only this scope for (size_t x = 0; x < count; ++x) { - op()->Forward(opContext_, - c_.blob_input_vec_, - req, - c_.blob_output_vec_, - c_.blob_aux_states_); + op()->Forward( + opContext_, c_.blob_input_vec_, req, c_.blob_output_vec_, c_.blob_aux_states_); } } else { for (size_t x = 0; x < count; ++x) { @@ -208,10 +211,9 @@ class LegacyOperatorExecutor : public OperatorDataInitializer void backward(const size_t count = 1) { const std::vector req(c_.blob_in_grad_.size(), kWriteTo); // Possibly move data to/from CPU and GPU (outside of timing scope) - MXNET_CUDA_ONLY(std::unique_ptr gpuData(isGPU_ ? - new GPUOpData(c_, &opContext_) : nullptr)); - perf::TimingItem timeB(&OperatorExecutorTiming::GetTiming(), Backward, - "Backward", count); + MXNET_CUDA_ONLY( + std::unique_ptr gpuData(isGPU_ ? new GPUOpData(c_, &opContext_) : nullptr)); + perf::TimingItem timeB(&OperatorExecutorTiming::GetTiming(), Backward, "Backward", count); if (!isGPU_) { mxnet::profiler::vtune::VTuneResume profile; // VTune sample only this scope for (size_t x = 0; x < count; ++x) { @@ -240,25 +242,26 @@ class LegacyOperatorExecutor : public OperatorDataInitializer * \brief Test if operator has a backward pass * \return true if this operator has a backward pass */ - MSHADOW_CINLINE bool HasBackward() const { return true; } + MSHADOW_CINLINE bool HasBackward() const { + return true; + } /*! \brief Getter functions for the operator */ - inline Operator *op() { return op_.get(); } - inline const Operator *op() const { return op_.get(); } - - enum BlobVectorType { - kInput, - kOutput, - kAux, - kInGrad, - kOutGrad, - kBlobVectorTypeCount - }; + inline Operator* op() { + return op_.get(); + } + inline const Operator* op() const { + return op_.get(); + } -#define CASE_STR(__v$) case (__v$): return #__v$ + enum BlobVectorType { kInput, kOutput, kAux, kInGrad, kOutGrad, kBlobVectorTypeCount }; + +#define CASE_STR(__v$) \ + case (__v$): \ + return #__v$ /*! \brief Convert BlobVectorType enum into a string */ - static inline const char *bvt2String(const BlobVectorType bvt) { + static inline const char* bvt2String(const BlobVectorType bvt) { switch (bvt) { CASE_STR(kInput); CASE_STR(kOutput); @@ -298,11 +301,11 @@ class LegacyOperatorExecutor : public OperatorDataInitializer * After that, you can compare with the "actual" operator state (BasicOperatorData) of * the operator that you are testing. */ - template - inline void dumpC(Stream *_os, const std::string& label) { + template + inline void dumpC(Stream* _os, const std::string& label) { Stream& os = *_os; - os << "static const std::vector< std::vector< std::vector > > ___" - << label << "_data_shape_"; + os << "static const std::vector< std::vector< std::vector > > ___" << label + << "_data_shape_"; const mxnet::TShape& shape = shape_input_vec_[0]; for (size_t i = 0, n = shape.ndim(); i < n; ++i) { os << shape[i] << "_"; @@ -329,10 +332,12 @@ class LegacyOperatorExecutor : public OperatorDataInitializer os << "};" << std::endl; } - static inline void copy(const TBlob& blob, const DType array[], - const size_t start, const size_t end) { + static inline void copy(const TBlob& blob, + const DType array[], + const size_t start, + const size_t end) { const size_t blobSize = blob.Size(); - DType *p = blob.dptr(); + DType* p = blob.dptr(); for (size_t i = 0, n = end - start; i < n; ++i) { CHECK_LT(i, blobSize); p[i] = array[i + start]; @@ -342,63 +347,75 @@ class LegacyOperatorExecutor : public OperatorDataInitializer /*! \brief Runtime load of the C++ data code generated by dumpC() */ void load(const std::vector>>& cData) { for (size_t i = 0, ni = cData.size(); i < ni; ++i) { - for (size_t j = 0, nj = cData[i].size(); j < nj; ++j) { - const TBlob& blob = getBlobVect(BlobVectorType(i))[j]; + for (size_t j = 0, nj = cData[i].size(); j < nj; ++j) { + const TBlob& blob = getBlobVect(BlobVectorType(i))[j]; const size_t sourceDataSize = cData[i][j].size(); CHECK_EQ(sourceDataSize, blob.Size()); - const DType *sourceData = &cData[i][j][0]; + const DType* sourceData = &cData[i][j][0]; copy(blob, sourceData, 0, sourceDataSize); } } } /*! \brief Runtime load of the C++ data code generated by dumpC() */ - void load(const std::vector>>& cData, - const BlobVectorType type) { + void load(const std::vector>>& cData, const BlobVectorType type) { CHECK_LT(type, cData.size()); - for (size_t j = 0, nj = cData[type].size(); j < nj; ++j) { - const TBlob& blob = getBlobVect(type)[j]; + for (size_t j = 0, nj = cData[type].size(); j < nj; ++j) { + const TBlob& blob = getBlobVect(type)[j]; const size_t sourceDataSize = cData[type][j].size(); CHECK_EQ(sourceDataSize, blob.Size()); - const DType *sourceData = &cData[type][j][0]; + const DType* sourceData = &cData[type][j][0]; copy(blob, sourceData, 0, sourceDataSize); } } /*! \brief Runtime load of the C++ data code generated by dumpC() */ void load(const std::vector>>& cData, - const BlobVectorType type, const int idx) { + const BlobVectorType type, + const int idx) { CHECK_LT(type, cData.size()); CHECK_LT(idx, cData[type].size()); - const TBlob& blob = getBlobVect(type)[idx]; + const TBlob& blob = getBlobVect(type)[idx]; const size_t sourceDataSize = cData[type][idx].size(); CHECK_EQ(sourceDataSize, blob.Size()); - const DType *sourceData = &cData[type][idx][0]; + const DType* sourceData = &cData[type][idx][0]; copy(blob, sourceData, 0, sourceDataSize); } -// void FillRandom() { -// for (size_t j = 0, jn = this->c_.all_blob_vects_.size(); j < jn; ++j) { -// std::vector *data_vect = this->c_.all_blob_vects_[j]; -// if (data_vect) { -// for (size_t i = 0, n = data_vect->size(); i < n; ++i) { -// OperatorDataInitializer::FillRandom((*data_vect)[i]); -// } -// } -// } -// } - - std::vector& inputs() { return c_.blob_input_vec_; } - const std::vector& inputs() const { return c_.blob_input_vec_; } - std::vector& outputs() { return c_.blob_output_vec_; } - const std::vector& outputs() const { return c_.blob_output_vec_; } - std::vector& bwd_inputs() { return c_.blob_out_grad_; } - std::vector& bwd_outputs() { return c_.blob_in_grad_; } + // void FillRandom() { + // for (size_t j = 0, jn = this->c_.all_blob_vects_.size(); j < jn; ++j) { + // std::vector *data_vect = this->c_.all_blob_vects_[j]; + // if (data_vect) { + // for (size_t i = 0, n = data_vect->size(); i < n; ++i) { + // OperatorDataInitializer::FillRandom((*data_vect)[i]); + // } + // } + // } + // } + + std::vector& inputs() { + return c_.blob_input_vec_; + } + const std::vector& inputs() const { + return c_.blob_input_vec_; + } + std::vector& outputs() { + return c_.blob_output_vec_; + } + const std::vector& outputs() const { + return c_.blob_output_vec_; + } + std::vector& bwd_inputs() { + return c_.blob_out_grad_; + } + std::vector& bwd_outputs() { + return c_.blob_in_grad_; + } /*! \brief Input and output blobs */ - OpContext opContext_; + OpContext opContext_; - mxnet::ShapeVector shape_input_vec_; + mxnet::ShapeVector shape_input_vec_; struct OpData { std::vector blob_input_vec_; @@ -407,7 +424,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer std::vector blob_in_grad_; std::vector blob_out_grad_; // Remaining err (loss) pushing back upstream - std::vector *> all_blob_vects_; + std::vector*> all_blob_vects_; inline OpData() { all_blob_vects_.emplace_back(&blob_input_vec_); all_blob_vects_.emplace_back(&blob_output_vec_); @@ -420,31 +437,30 @@ class LegacyOperatorExecutor : public OperatorDataInitializer #if MXNET_USE_CUDA class GPUOpData : public OpData { - GPUOpData() = delete; + GPUOpData() = delete; GPUOpData(const GPUOpData& o) = delete; public: - inline GPUOpData(const OpData& cpuData, OpContext *opContext) - : cpuData_(cpuData) - , allocGPUStream_(opContext) { + inline GPUOpData(const OpData& cpuData, OpContext* opContext) + : cpuData_(cpuData), allocGPUStream_(opContext) { // Copy CPU->GPU CHECK_EQ(gpuBlobs_.size(), 0U); CHECK_EQ(cpuData_.all_blob_vects_.size(), this->all_blob_vects_.size()); for (size_t bvt = 0, nbvt = cpuData_.all_blob_vects_.size(); bvt < nbvt; ++bvt) { - std::vector& bv_src = *cpuData_.all_blob_vects_[bvt]; + std::vector& bv_src = *cpuData_.all_blob_vects_[bvt]; std::vector& bvt_dest = *this->all_blob_vects_[bvt]; for (size_t i = 0, n = bv_src.size(); i < n; ++i) { const TBlob& srcBlob = bv_src[i]; - TBlob *destBlob = allocateBlob(&gpuBlobs_, &bvt_dest, srcBlob.shape_, - true, srcBlob.type_flag_); + TBlob* destBlob = + allocateBlob(&gpuBlobs_, &bvt_dest, srcBlob.shape_, true, srcBlob.type_flag_); Context cpu_ctx, gpu_ctx; cpu_ctx.dev_type = Context::kCPU; gpu_ctx.dev_type = Context::kGPU; cpu_ctx.dev_id = gpu_ctx.dev_id = 0; - mxnet::ndarray::Copy(srcBlob, destBlob, cpu_ctx, - gpu_ctx, allocGPUStream_.opContext_.run_ctx); + mxnet::ndarray::Copy( + srcBlob, destBlob, cpu_ctx, gpu_ctx, allocGPUStream_.opContext_.run_ctx); } } cudaDeviceSynchronize(); @@ -453,19 +469,19 @@ class LegacyOperatorExecutor : public OperatorDataInitializer // Copy GPU->CPU cudaDeviceSynchronize(); for (size_t bvt = 0, nbvt = this->all_blob_vects_.size(); bvt < nbvt; ++bvt) { - std::vector& bv_src = *this->all_blob_vects_[bvt]; + std::vector& bv_src = *this->all_blob_vects_[bvt]; std::vector& bvt_dest = *cpuData_.all_blob_vects_[bvt]; for (size_t i = 0, n = bv_src.size(); i < n; ++i) { const TBlob& srcBlob = bv_src[i]; - TBlob *destBlob = &bvt_dest[i]; + TBlob* destBlob = &bvt_dest[i]; Context cpu_ctx, gpu_ctx; cpu_ctx.dev_type = Context::kCPU; gpu_ctx.dev_type = Context::kGPU; cpu_ctx.dev_id = gpu_ctx.dev_id = 0; - mxnet::ndarray::Copy(srcBlob, destBlob, gpu_ctx, - cpu_ctx, allocGPUStream_.opContext_.run_ctx); + mxnet::ndarray::Copy( + srcBlob, destBlob, gpu_ctx, cpu_ctx, allocGPUStream_.opContext_.run_ctx); } } gpuBlobs_.clear(); // Force deallocation of the GPU blob data @@ -483,7 +499,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer #endif // MXNET_USE_CUDA protected: - OpData c_; + OpData c_; /*! \brief Allocate the operator's resource requests */ void allocateResources(const std::vector& reqs) { @@ -491,7 +507,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer Context ctx; ctx.dev_type = isGPU_ ? Context::kGPU : Context::kCPU; - ctx.dev_id = 0; + ctx.dev_id = 0; for (const ResourceRequest& req : reqs) { switch (req.type) { @@ -513,7 +529,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer Resource rm = ResourceManager::Get()->Request(ctx, req); if (ctx.dev_mask() == Context::kCPU) { common::random::RandGenerator::AllocState( - rm.get_parallel_random()); + rm.get_parallel_random()); } opContext_.requested.emplace_back(rm); break; @@ -531,47 +547,46 @@ class LegacyOperatorExecutor : public OperatorDataInitializer } /*! \brief Locally allocate a managed TBlob and insert into the supplied vector */ - static TBlob *allocateBlob(std::list> *standalone_blobs, - std::vector *dest, + static TBlob* allocateBlob(std::list>* standalone_blobs, + std::vector* dest, const mxnet::TShape& shape, const bool isGPU, const int dtype) { - test::StandaloneBlob *blob = new test::StandaloneBlob(shape, isGPU, dtype); - CHECK_NE(blob, static_cast(nullptr)); + test::StandaloneBlob* blob = new test::StandaloneBlob(shape, isGPU, dtype); + CHECK_NE(blob, static_cast(nullptr)); standalone_blobs->emplace_back(std::unique_ptr(blob)); (*dest).emplace_back(*blob); return blob; } /*! \brief Locally allocate a managed TBlob and insert into the supplied vector */ - inline TBlob *allocateBlob(std::vector *dest, const mxnet::TShape& shape, - const bool isGPU, const int dtype) { + inline TBlob* allocateBlob(std::vector* dest, + const mxnet::TShape& shape, + const bool isGPU, + const int dtype) { return allocateBlob(&standalone_blobs_, dest, shape, isGPU, dtype); } /*! \brief Performance timing categories */ - enum TimingId { - Forward, - Backward - }; + enum TimingId { Forward, Backward }; /*! \brief The operator */ - std::unique_ptr op_; + std::unique_ptr op_; /*! \brief Is this for a GPU? */ - const bool isGPU_; + const bool isGPU_; /*! \brief Assure that the Forward initialized only once */ - std::atomic initializeForward_; + std::atomic initializeForward_; /*! \brief Assure that the Forward initialized only once */ - std::atomic initializeBackward_; + std::atomic initializeBackward_; /*! \brief Assure that the callback is initialized only once */ - std::atomic initializeCallback_; + std::atomic initializeCallback_; /*! \brief scoped lifecycle management of allocated blobs */ std::list> standalone_blobs_; }; -template +template using LegacyOpRunner = -mxnet::test::OperatorRunner>; + mxnet::test::OperatorRunner>; } // namespace op } // namespace test diff --git a/tests/cpp/include/test_ndarray_utils.h b/tests/cpp/include/test_ndarray_utils.h index 8a53298f4811..5656d2003d0a 100644 --- a/tests/cpp/include/test_ndarray_utils.h +++ b/tests/cpp/include/test_ndarray_utils.h @@ -41,8 +41,8 @@ using namespace mxnet; #define TEST_DTYPE float #define TEST_ITYPE int32_t -inline void CheckDataRegion(const TBlob &src, const TBlob &dst) { - auto size = src.shape_.Size() * mshadow::mshadow_sizeof(src.type_flag_); +inline void CheckDataRegion(const TBlob& src, const TBlob& dst) { + auto size = src.shape_.Size() * mshadow::mshadow_sizeof(src.type_flag_); auto equals = memcmp(src.dptr_, dst.dptr_, size); EXPECT_EQ(equals, 0); } @@ -55,13 +55,14 @@ inline unsigned gen_rand_seed() { inline float RandFloat() { static unsigned seed = gen_rand_seed(); - double v = rand_r(&seed) * 1.0 / RAND_MAX; + double v = rand_r(&seed) * 1.0 / RAND_MAX; return static_cast(v); } // Get an NDArray with provided indices, prepared for a RowSparse NDArray. -inline NDArray RspIdxND(const mxnet::TShape shape, const Context ctx, - const std::vector &values) { +inline NDArray RspIdxND(const mxnet::TShape shape, + const Context ctx, + const std::vector& values) { NDArray nd(shape, ctx, false, ROW_SPARSE_IDX_TYPE); size_t num_val = values.size(); MSHADOW_TYPE_SWITCH(nd.dtype(), DType, { @@ -92,8 +93,8 @@ inline NDArray DnsND(const mxnet::TShape shape, const Context ctx, std::vector -static void inline CopyBlob(mshadow::Stream *s, +template +static void inline CopyBlob(mshadow::Stream* s, const TBlob& dest_blob, const TBlob& src_blob) { using namespace mshadow; @@ -125,10 +126,9 @@ inline NDArray RspND(const mxnet::TShape shape, print(&std::cout, "data", data); // create result nd mxnet::ShapeVector aux_shapes = {mshadow::Shape1(num_rows)}; - NDArray nd(kRowSparseStorage, shape, ctx, false, mshadow::default_type_flag, - {}, aux_shapes); + NDArray nd(kRowSparseStorage, shape, ctx, false, mshadow::default_type_flag, {}, aux_shapes); - mshadow::Stream *s = nullptr; + mshadow::Stream* s = nullptr; CopyBlob(s, nd.aux_data(rowsparse::kIdx), index.data()); CopyBlob(s, nd.data(), data.data()); @@ -137,15 +137,15 @@ inline NDArray RspND(const mxnet::TShape shape, } /*! \brief Array - utility class to construct sparse arrays - * \warning This class is not meant to run in a production environment. Since it is for unit tests only, - * simplicity has been chosen over performance. + * \warning This class is not meant to run in a production environment. Since it is for unit tests + *only, simplicity has been chosen over performance. **/ -template +template class Array { typedef std::map > TItems; static constexpr double EPSILON = 1e-5; - static const char *st2str(const NDArrayStorageType storageType) { + static const char* st2str(const NDArrayStorageType storageType) { switch (storageType) { case kDefaultStorage: return "kDefaultStorage"; @@ -163,15 +163,13 @@ class Array { /*! \brief Remove all zero entries */ void Prune() { - for (typename TItems::iterator i = items_.begin(), e = items_.end(); - i != e;) { - const size_t y = i->first; - std::map &m = i->second; + for (typename TItems::iterator i = items_.begin(), e = items_.end(); i != e;) { + const size_t y = i->first; + std::map& m = i->second; ++i; - for (typename std::map::const_iterator j = m.begin(), jn = m.end(); - j != jn;) { + for (typename std::map::const_iterator j = m.begin(), jn = m.end(); j != jn;) { const size_t x = j->first; - const DType v = j->second; + const DType v = j->second; ++j; if (IsZero(v)) { m.erase(x); @@ -186,20 +184,19 @@ class Array { /*! \brief Create a dense NDArray from our mapped data */ NDArray CreateDense(const Context& ctx) const { NDArray array(shape_, Context::CPU(-1)); - TBlob data = array.data(); - DType *p_data = data.dptr(); + TBlob data = array.data(); + DType* p_data = data.dptr(); memset(p_data, 0, array.shape().Size() * sizeof(DType)); - for (typename TItems::const_iterator i = items_.begin(), e = items_.end(); - i != e; ++i) { - const size_t y = i->first; - const std::map &m = i->second; - for (typename std::map::const_iterator j = m.begin(), jn = m.end(); - j != jn; ++j) { + for (typename TItems::const_iterator i = items_.begin(), e = items_.end(); i != e; ++i) { + const size_t y = i->first; + const std::map& m = i->second; + for (typename std::map::const_iterator j = m.begin(), jn = m.end(); j != jn; + ++j) { const size_t x = j->first; - const DType v = j->second; + const DType v = j->second; if (!IsZero(v)) { const size_t offset = mxnet::test::offset(shape_, {y, x}); - p_data[offset] = v; + p_data[offset] = v; } } } @@ -215,11 +212,9 @@ class Array { public: Array() = default; - explicit Array(const mxnet::TShape &shape) - : shape_(shape) {} + explicit Array(const mxnet::TShape& shape) : shape_(shape) {} - explicit Array(const NDArray &arr) - : shape_(arr.shape()) { + explicit Array(const NDArray& arr) : shape_(arr.shape()) { Load(arr); } @@ -228,19 +223,25 @@ class Array { shape_ = mxnet::TShape(0); } - static inline bool IsNear(const DType v1, const DType v2) { return fabs(v2 - v1) <= EPSILON; } - static inline bool IsZero(const DType v) { return IsNear(v, DType(0)); } + static inline bool IsNear(const DType v1, const DType v2) { + return fabs(v2 - v1) <= EPSILON; + } + static inline bool IsZero(const DType v) { + return IsNear(v, DType(0)); + } /*! Index into value maps via: [y][x] (row, col) */ - std::map &operator[](const size_t idx) { return items_[idx]; } + std::map& operator[](const size_t idx) { + return items_[idx]; + } - const std::map &operator[](const size_t idx) const { + const std::map& operator[](const size_t idx) const { typename TItems::const_iterator i = items_.find(idx); if (i != items_.end()) { return i->second; } CHECK(false) << "Attempt to access a non-existent key in a constant map"; - return *static_cast *>(nullptr); + return *static_cast*>(nullptr); } bool Contains(const size_t row, const size_t col) const { @@ -255,12 +256,12 @@ class Array { } /*! \brief Convert from one storage type NDArray to another */ - static NDArray Convert(const Context& ctx, const NDArray& src, + static NDArray Convert(const Context& ctx, + const NDArray& src, const NDArrayStorageType storageType) { - std::unique_ptr pArray( - storageType == kDefaultStorage - ? new NDArray(src.shape(), ctx) - : new NDArray(storageType, src.shape(), ctx)); + std::unique_ptr pArray(storageType == kDefaultStorage ? + new NDArray(src.shape(), ctx) : + new NDArray(storageType, src.shape(), ctx)); OpContext opContext; MXNET_CUDA_ONLY(std::unique_ptr gpuScope;); switch (ctx.dev_type) { @@ -269,7 +270,7 @@ class Array { gpuScope.reset(new test::op::GPUStreamScope(&opContext)); mxnet::op::CastStorageComputeImpl(s, src, dest); break; -#endif // MNXNET_USE_CUDA +#endif // MNXNET_USE_CUDA default: { // CPU OpContext op_ctx; mxnet::op::CastStorageComputeImpl(op_ctx, src, *pArray); @@ -308,7 +309,7 @@ class Array { } #endif // MXNET_USE_CUDA const TBlob blob = array.data(); - DType *p = blob.dptr(); + DType* p = blob.dptr(); CHECK_EQ(shape_.ndim(), 2U); for (size_t row = 0, nrow = shape_[0]; row < nrow; ++row) { for (size_t col = 0, ncol = shape_[1]; col < ncol; ++col) { @@ -321,15 +322,14 @@ class Array { } void print() const { - for (typename TItems::const_iterator i = items_.begin(), e = items_.end(); - i != e; ++i) { - const size_t y = i->first; - const std::map &m = i->second; + for (typename TItems::const_iterator i = items_.begin(), e = items_.end(); i != e; ++i) { + const size_t y = i->first; + const std::map& m = i->second; CHECK_EQ(m.empty(), false); // How did it get to have an empty map? - for (typename std::map::const_iterator j = m.begin(), jn = m.end(); - j != jn; ++j) { + for (typename std::map::const_iterator j = m.begin(), jn = m.end(); j != jn; + ++j) { const size_t x = j->first; - const DType v = j->second; + const DType v = j->second; if (!IsZero(v)) { std::cout << "[row=" << y << ", col=" << x << "]: " << v << std::endl; } @@ -343,11 +343,10 @@ class Array { TItems items_; }; -template -inline StreamType& print_dense(StreamType *_os, const std::string& label, const NDArray& arr) { +template +inline StreamType& print_dense(StreamType* _os, const std::string& label, const NDArray& arr) { MSHADOW_TYPE_SWITCH(arr.data().type_flag_, DType, { - print(_os, label, test::Array(arr).Save(arr.ctx(), kDefaultStorage)) - << std::endl; + print(_os, label, test::Array(arr).Save(arr.ctx(), kDefaultStorage)) << std::endl; }); return *_os; } diff --git a/tests/cpp/include/test_op.h b/tests/cpp/include/test_op.h index c80255d72f20..141c5975f993 100644 --- a/tests/cpp/include/test_op.h +++ b/tests/cpp/include/test_op.h @@ -67,17 +67,14 @@ namespace op { * \brief Maintain the lifecycle of a GPU stream */ struct GPUStreamScope { - explicit inline GPUStreamScope(OpContext *opContext) - : opContext_(*opContext) { - CHECK_EQ(opContext_.run_ctx.stream == nullptr, true) - << "Invalid runtime context stream state"; + explicit inline GPUStreamScope(OpContext* opContext) : opContext_(*opContext) { + CHECK_EQ(opContext_.run_ctx.stream == nullptr, true) << "Invalid runtime context stream state"; opContext_.run_ctx.stream = mshadow::NewStream(true, true, opContext_.run_ctx.ctx.dev_id); - CHECK_EQ(opContext_.run_ctx.stream != nullptr, true) - << "Unable to allocate a GPU stream"; + CHECK_EQ(opContext_.run_ctx.stream != nullptr, true) << "Unable to allocate a GPU stream"; } inline ~GPUStreamScope() { if (opContext_.run_ctx.stream) { - mshadow::DeleteStream(static_cast *>(opContext_.run_ctx.stream)); + mshadow::DeleteStream(static_cast*>(opContext_.run_ctx.stream)); opContext_.run_ctx.stream = nullptr; } } @@ -88,12 +85,10 @@ struct GPUStreamScope { /*! * \brief Base class for operator test-data classes */ -template +template class OperatorDataInitializer { public: - OperatorDataInitializer() - : generator_(new std::mt19937()) { - } + OperatorDataInitializer() : generator_(new std::mt19937()) {} virtual ~OperatorDataInitializer() {} /*! @@ -132,7 +127,9 @@ class OperatorDataInitializer { * \brief mt19937 generator for random number generator * \return reference to mt19937 generator object */ - std::mt19937& generator() const { return *generator_; } + std::mt19937& generator() const { + return *generator_; + } /*! \brief Per-test generator */ std::unique_ptr generator_; @@ -140,7 +137,9 @@ class OperatorDataInitializer { class OperatorExecutorTiming { public: - inline test::perf::TimingInstrument& GetTiming() { return timing_; } + inline test::perf::TimingInstrument& GetTiming() { + return timing_; + } private: /*! Timing instrumentation */ @@ -148,10 +147,10 @@ class OperatorExecutorTiming { }; /*! \brief Top-level operator test state info structure */ -template +template struct OpInfo { /*! \brief The operator data */ - std::shared_ptr< OperatorExecutor > executor_; + std::shared_ptr executor_; /*! \brief The operator prop class */ std::shared_ptr prop_; /*! \brief The input type(s) */ @@ -159,16 +158,16 @@ struct OpInfo { }; /*! \brief Pair of op info objects, generally for validating ops against each other */ -template +template struct OpInfoPair { /*! \brief Operator item 1 */ - test::op::OpInfo info_1_; + test::op::OpInfo info_1_; /*! \brief Operator item 2 */ - test::op::OpInfo info_2_; + test::op::OpInfo info_2_; }; /*! \brief Base validator class for validating test data */ -template +template class Validator { public: static inline DType ERROR_BOUND() { @@ -180,10 +179,10 @@ class Validator { } } - static inline DType ErrorBound(const TBlob *blob) { + static inline DType ErrorBound(const TBlob* blob) { // Due to eps, for a small number of entries, the error will be a bit higher for one pass if (blob->shape_.ndim() >= 3) { - if (blob->Size() / blob->shape_[1] <=4) { + if (blob->Size() / blob->shape_[1] <= 4) { return ERROR_BOUND() * 15; } else { return ERROR_BOUND(); @@ -195,11 +194,11 @@ class Validator { } /*! \brief Adjusted error based upon significant digits */ - template - static inline DType ErrorBound(const TBlob *blob, const DTypeX v1, const DTypeX v2) { + template + static inline DType ErrorBound(const TBlob* blob, const DTypeX v1, const DTypeX v2) { const DType initialErrorBound = ErrorBound(blob); DType kErrorBound = initialErrorBound; // This error is based upon the range [0.1x, 0.9x] - DTypeX avg = static_cast((fabs(v1) + fabs(v2)) / 2); + DTypeX avg = static_cast((fabs(v1) + fabs(v2)) / 2); if (avg >= 1) { uint64_t vv = static_cast(avg + 0.5); do { @@ -209,19 +208,21 @@ class Validator { return kErrorBound; } - template + template static bool isNear(const DTypeX v1, const DTypeX v2, const AccReal error) { return error >= fabs(v2 - v1); } /*! \brief Convenient setpoint for macro-expanded failures */ - template - static void on_failure(const size_t i, const size_t n, - const Type1 v1, const Type1 v2, const Type2 kErrorBound) { - LOG(WARNING) - << "Near test failure: at i = " << i << ", n = " - << n << ", kErrorBound = " << kErrorBound << std::endl - << std::flush; + template + static void on_failure(const size_t i, + const size_t n, + const Type1 v1, + const Type1 v2, + const Type2 kErrorBound) { + LOG(WARNING) << "Near test failure: at i = " << i << ", n = " << n + << ", kErrorBound = " << kErrorBound << std::endl + << std::flush; } /*! \brief Compare blob data */ @@ -229,12 +230,12 @@ class Validator { if (b1.shape_ == b2.shape_) { CHECK_EQ(b1.type_flag_, b2.type_flag_) << "Can't compare blobs of different data types"; MSHADOW_REAL_TYPE_SWITCH(b1.type_flag_, DTypeX, { - const DTypeX *d1 = b1.dptr(); - const DTypeX *d2 = b2.dptr(); + const DTypeX* d1 = b1.dptr(); + const DTypeX* d2 = b2.dptr(); CHECK_NE(d1, d2); // don't compare the same memory for (size_t i = 0, n = b1.Size(), warningCount = 0; i < n; ++i) { - const DTypeX v1 = *d1++; - const DTypeX v2 = *d2++; + const DTypeX v1 = *d1++; + const DTypeX v2 = *d2++; const DType kErrorBound = ErrorBound(&b1, v1, v2); EXPECT_NEAR(v1, v2, kErrorBound); if (!isNear(v1, v2, kErrorBound) && !warningCount++) { @@ -249,9 +250,9 @@ class Validator { } /*! \brief Compare blob data to a pointer to data */ - template - static bool compare(const TBlob& b1, const DTypeX *valuePtr) { - const DTypeX *d1 = b1.dptr(); + template + static bool compare(const TBlob& b1, const DTypeX* valuePtr) { + const DTypeX* d1 = b1.dptr(); CHECK_NE(d1, valuePtr); // don't compare the same memory const DType kErrorBound = ErrorBound(&b1); for (size_t i = 0, n = b1.Size(), warningCount = 0; i < n; ++i) { @@ -270,16 +271,13 @@ class Validator { typedef std::vector > kwargs_t; /*! \brief Create operator data, prop, the operator itself and init default forward input */ -template< - typename OperatorProp, - typename OperatorExecutor, - typename ...Args> -static test::op::OpInfo createOpAndInfoF(const kwargs_t &kwargs, +template +static test::op::OpInfo createOpAndInfoF(const kwargs_t& kwargs, Args... args) { test::op::OpInfo info; info.executor_ = std::make_shared(args...); - info.prop_ = std::make_shared(); - info.in_type_ = { mshadow::DataType::kFlag }; + info.prop_ = std::make_shared(); + info.in_type_ = {mshadow::DataType::kFlag}; info.prop_->Init(kwargs); info.executor_->initForward(*info.prop_, &info.in_type_); return info; diff --git a/tests/cpp/include/test_op_runner.h b/tests/cpp/include/test_op_runner.h index b46065bb5cdb..bf641ca24ba4 100644 --- a/tests/cpp/include/test_op_runner.h +++ b/tests/cpp/include/test_op_runner.h @@ -21,7 +21,7 @@ * \file test_op_runner.h * \brief Run a generic operator * \author Chris Olivier -*/ + */ #ifndef TEST_OP_RUNNER_H_ #define TEST_OP_RUNNER_H_ @@ -39,10 +39,10 @@ namespace test { * \tparam OperatorExecutor Data container for forward and backward passes for some given * data types */ -template +template class OperatorRunner { public: - typedef typename OperatorExecutor::DataType DType; + typedef typename OperatorExecutor::DataType DType; OperatorRunner() { #ifdef NDEBUG @@ -61,21 +61,20 @@ class OperatorRunner { * \param count Number of times to run in each direction * \return OpInfo object for further opereator analysis */ - test::op::OpInfo - RunGenericOperatorForward( - bool isGPU, - const mxnet::ShapeVector& inputShapes, - const std::vector > &kwargs, - const size_t count = 1) { + test::op::OpInfo RunGenericOperatorForward( + bool isGPU, + const mxnet::ShapeVector& inputShapes, + const std::vector >& kwargs, + const size_t count = 1) { #if MXNET_USE_CUDA if (isGPU && !test::unitTestsWithCuda) { LOG(INFO) << "GPU not found, running test as non-GPU"; } #else - isGPU = false; + isGPU = false; #endif test::op::OpInfo info = - test::op::createOpAndInfoF(kwargs, isGPU, inputShapes); + test::op::createOpAndInfoF(kwargs, isGPU, inputShapes); info.executor_->initForward(*info.prop_, &info.in_type_); info.executor_->forward(count); return info; @@ -88,8 +87,8 @@ class OperatorRunner { * \return OpInfo object for further opereator analysis */ test::op::OpInfo RunGenericOperatorBackward( - test::op::OpInfo *info, - const size_t count = 1) { + test::op::OpInfo* info, + const size_t count = 1) { CHECK(info->executor_->HasBackward()); info->executor_->initBackward(*info->prop_, &info->in_type_); info->executor_->backward(count); @@ -106,12 +105,12 @@ class OperatorRunner { * \return */ test::op::OpInfo RunBidirectional( - bool isGPU, - const mxnet::ShapeVector& inputShapes, - const std::vector > &kwargs, - const size_t count = 1) { + bool isGPU, + const mxnet::ShapeVector& inputShapes, + const std::vector >& kwargs, + const size_t count = 1) { test::op::OpInfo info = - RunGenericOperatorForward(isGPU, inputShapes, kwargs, count); + RunGenericOperatorForward(isGPU, inputShapes, kwargs, count); if (info.executor_->HasBackward()) { return RunGenericOperatorBackward(&info, count); } @@ -130,18 +129,18 @@ class OperatorRunner { * \param dim Data dimensions * \param count Number of times to run in each direction */ - std::unordered_map - TimingTest(const std::string& label, - const bool isGPU, - const bool stochastic, - const test::op::kwargs_t& kwargs, - int dim = 0, - size_t count = 1, - const mxnet::ShapeVector& timing_shapes = {}, - bool backward = true) { + std::unordered_map TimingTest( + const std::string& label, + const bool isGPU, + const bool stochastic, + const test::op::kwargs_t& kwargs, + int dim = 0, + size_t count = 1, + const mxnet::ShapeVector& timing_shapes = {}, + bool backward = true) { if (mxnet::test::quick_test) { total_iterations_ = 2; - count = 1; + count = 1; } test::perf::TimingInstrument timing; @@ -168,18 +167,18 @@ class OperatorRunner { for (size_t i = 0; i < total_iterations_; ++i) { index_t batchSize = 1; - index_t channels = 1; - index_t depth = 1; - index_t height = 1; - index_t width = 1; + index_t channels = 1; + index_t depth = 1; + index_t height = 1; + index_t width = 1; if (timing_shapes.empty()) { do { batchSize = stochastic ? test::rangedRand(1U, TEST_BATCH_SIZE * 2U) : TIMING_BATCH_SIZE; - channels = stochastic ? test::rangedRand(1U, TEST_CHANNELS * 2U) : TIMING_CHANNELS; - depth = stochastic ? test::rangedRand(1U, TEST_DEPTH * 2U) : TIMING_DEPTH; - height = stochastic ? test::rangedRand(1U, TEST_DH * 2U) : TIMING_DH; - width = stochastic ? test::rangedRand(1U, TEST_DW * 2U) : TIMING_DW; + channels = stochastic ? test::rangedRand(1U, TEST_CHANNELS * 2U) : TIMING_CHANNELS; + depth = stochastic ? test::rangedRand(1U, TEST_DEPTH * 2U) : TIMING_DEPTH; + height = stochastic ? test::rangedRand(1U, TEST_DH * 2U) : TIMING_DH; + width = stochastic ? test::rangedRand(1U, TEST_DW * 2U) : TIMING_DW; } while (stochastic && (height * width) == 1U); } else { dim = timing_shapes[0].ndim() - 1; @@ -190,37 +189,31 @@ class OperatorRunner { test::op::OpInfo info; switch (D) { case 0: - info = RunGenericOperatorForward(isGPU, - !timing_shapes.empty() - ? timing_shapes - : mxnet::ShapeVector({mxnet::TShape({batchSize, - channels, - width})}), - kwargs, - count); + info = RunGenericOperatorForward( + isGPU, + !timing_shapes.empty() ? + timing_shapes : + mxnet::ShapeVector({mxnet::TShape({batchSize, channels, width})}), + kwargs, + count); break; case 1: - info = RunGenericOperatorForward(isGPU, - !timing_shapes.empty() - ? timing_shapes - : mxnet::ShapeVector({ mxnet::TShape({batchSize, - channels, - height, - width})}), - kwargs, - count); + info = RunGenericOperatorForward( + isGPU, + !timing_shapes.empty() ? + timing_shapes : + mxnet::ShapeVector({mxnet::TShape({batchSize, channels, height, width})}), + kwargs, + count); break; case 2: - info = RunGenericOperatorForward(isGPU, - !timing_shapes.empty() - ? timing_shapes - : mxnet::ShapeVector({ mxnet::TShape({batchSize, - channels, - depth, - height, - width})}), - kwargs, - count); + info = RunGenericOperatorForward( + isGPU, + !timing_shapes.empty() ? + timing_shapes : + mxnet::ShapeVector({mxnet::TShape({batchSize, channels, depth, height, width})}), + kwargs, + count); break; default: CHECK(false) << "Unsupported dimension count: " << (D + 1); @@ -240,22 +233,26 @@ class OperatorRunner { return timing.data(); } - void set_verbose(bool verbose) { verbose_ = verbose; } + void set_verbose(bool verbose) { + verbose_ = verbose; + } - void set_total_iterations(size_t iterations) { total_iterations_ = iterations; } + void set_total_iterations(size_t iterations) { + total_iterations_ = iterations; + } protected: static constexpr int TEST_BATCH_SIZE = 5; - static constexpr int TEST_CHANNELS = 3; - static constexpr int TEST_DEPTH = 2; - static constexpr int TEST_DH = 2; - static constexpr int TEST_DW = 3; + static constexpr int TEST_CHANNELS = 3; + static constexpr int TEST_DEPTH = 2; + static constexpr int TEST_DH = 2; + static constexpr int TEST_DW = 3; static constexpr int TIMING_BATCH_SIZE = 128; - static constexpr int TIMING_CHANNELS = 3; - static constexpr int TIMING_DEPTH = 2; - static constexpr int TIMING_DH = 64; - static constexpr int TIMING_DW = 64; + static constexpr int TIMING_CHANNELS = 3; + static constexpr int TIMING_DEPTH = 2; + static constexpr int TIMING_DH = 64; + static constexpr int TIMING_DW = 64; /*! \brief verbose output */ bool verbose_ = true; /*! \brief Tital iterations */ diff --git a/tests/cpp/include/test_perf.h b/tests/cpp/include/test_perf.h index 2daee316da12..2f215b5f68ee 100644 --- a/tests/cpp/include/test_perf.h +++ b/tests/cpp/include/test_perf.h @@ -21,7 +21,7 @@ * \file test_perf.h * \brief operator unit test utility functions * \author Chris Olivier -*/ + */ #ifndef TEST_PERF_H_ #define TEST_PERF_H_ @@ -83,35 +83,32 @@ inline uint64_t getNannoTickCount() { #endif } -#define MICRO2MS(__micro$) (((__micro$) + 500)/1000) -#define MICRO2MSF(__micro$) (static_cast(__micro$)/1000) -#define MICRO2MSF(__micro$) (static_cast(__micro$)/1000) -#define MS2MICRO(__ms$) ((__ms$) * 1000) -#define NANO2MSF(__nano$) (static_cast(__nano$)/1000000) -#define MICRO2S(__micro$) (((__micro$) + 500000)/1000000) -#define MICRO2SF(__micro$) (MICRO2MSF(__micro$)/1000) +#define MICRO2MS(__micro$) (((__micro$) + 500) / 1000) +#define MICRO2MSF(__micro$) (static_cast(__micro$) / 1000) +#define MICRO2MSF(__micro$) (static_cast(__micro$) / 1000) +#define MS2MICRO(__ms$) ((__ms$)*1000) +#define NANO2MSF(__nano$) (static_cast(__nano$) / 1000000) +#define MICRO2S(__micro$) (((__micro$) + 500000) / 1000000) +#define MICRO2SF(__micro$) (MICRO2MSF(__micro$) / 1000) /*! \brief Calculate time between construction and destruction */ class TimedScope { - std::string label_; - uint64_t startTime_; - uint64_t stopTime_; - const size_t count_; + std::string label_; + uint64_t startTime_; + uint64_t stopTime_; + const size_t count_; public: - explicit inline TimedScope(const char *msg = nullptr, size_t count = 1, const bool start = true) - : startTime_(start ? getMicroTickCount() : 0) - , stopTime_(0) - , count_(count) { + explicit inline TimedScope(const char* msg = nullptr, size_t count = 1, const bool start = true) + : startTime_(start ? getMicroTickCount() : 0), stopTime_(0), count_(count) { CHECK_NE(count, 0U); if (msg && *msg) { label_ = msg; } } - explicit inline TimedScope(const std::string &msg, size_t count = 1, const bool start = true) - : startTime_(start ? getMicroTickCount() : 0) - , count_(count) { + explicit inline TimedScope(const std::string& msg, size_t count = 1, const bool start = true) + : startTime_(start ? getMicroTickCount() : 0), count_(count) { CHECK_NE(count, 0U); if (!msg.empty()) { label_ = msg; @@ -127,7 +124,8 @@ class TimedScope { } inline void stop() { - stopTime_ = getMicroTickCount();; + stopTime_ = getMicroTickCount(); + ; } inline float elapsedMilliseconds() const { @@ -145,8 +143,7 @@ class TimedScope { if (!label_.empty()) { ss << label_ << " "; } - ss << "elapsed time: " - << std::setprecision(4) << std::fixed << MICRO2MSF(diff) << " ms"; + ss << "elapsed time: " << std::setprecision(4) << std::fixed << MICRO2MSF(diff) << " ms"; if (count_ != 0 && count_ != 1) { const float microSecondsEach = static_cast(diff) / count_; ss << " ( " << MICRO2MSF(microSecondsEach) << " ms each )"; @@ -158,10 +155,8 @@ class TimedScope { /*! \brief Accumulate separate timing values mapped by label/id -> total time spent */ class TimingInstrument { public: - explicit TimingInstrument(const char *name = "") - : name_(name) { - } - void startTiming(int id, const char *s) { + explicit TimingInstrument(const char* name = "") : name_(name) {} + void startTiming(int id, const char* s) { std::unique_lock lk(mutex_); auto i = data_.find(id); if (i == data_.end()) { @@ -189,11 +184,11 @@ class TimingInstrument { std::unique_lock lk(mutex_); auto i = data_.find(id); if (i != data_.end()) { - const Info &info = i->second; - const uint64_t duration = info.nestingCount_.load() - ? info.duration_.load() + - (getMicroTickCount() - info.baseTime_.load()) - : info.duration_.load(); + const Info& info = i->second; + const uint64_t duration = + info.nestingCount_.load() ? + info.duration_.load() + (getMicroTickCount() - info.baseTime_.load()) : + info.duration_.load(); return duration; } return 0; @@ -206,21 +201,19 @@ class TimingInstrument { return false; } - template - void print(StreamType *os, const std::string &label_, bool doReset = false) { + template + void print(StreamType* os, const std::string& label_, bool doReset = false) { std::unique_lock lk(mutex_); // Sorted output std::map data(data_.begin(), data_.end()); - for (std::map::const_iterator i = data.begin(), e = data.end(); - i != e; ++i) { - const Info &info = i->second; + for (std::map::const_iterator i = data.begin(), e = data.end(); i != e; ++i) { + const Info& info = i->second; const uint64_t duration = getDuration(i->first); *os << label_ << ": " << name_ << " Timing [" << info.name_ << "] " - << (info.nestingCount_.load() ? "*" : "") - << MICRO2MSF(duration) << " ms"; + << (info.nestingCount_.load() ? "*" : "") << MICRO2MSF(duration) << " ms"; if (info.cycleCount_.load()) { - *os << ", avg: " << (MICRO2MSF(duration) / info.cycleCount_) - << " ms X " << info.cycleCount_ << " passes"; + *os << ", avg: " << (MICRO2MSF(duration) / info.cycleCount_) << " ms X " << info.cycleCount_ + << " passes"; } *os << std::endl; } @@ -232,9 +225,8 @@ class TimingInstrument { void reset() { std::unique_lock lk(mutex_); - for (auto i = data_.begin(), e = data_.end(); - i != e; ++i) { - const int id = i->first; + for (auto i = data_.begin(), e = data_.end(); i != e; ++i) { + const int id = i->first; const bool wasTiming = isTiming(id); if (wasTiming) { stopTiming(id); @@ -248,12 +240,11 @@ class TimingInstrument { } } - TimingInstrument &operator+=(const TimingInstrument &o) { - for (auto i = o.data_.begin(), e = o.data_.end(); - i != e; ++i) { + TimingInstrument& operator+=(const TimingInstrument& o) { + for (auto i = o.data_.begin(), e = o.data_.end(); i != e; ++i) { auto j = data_.find(i->first); if (j != data_.end()) { - const Info &oInfo = i->second; + const Info& oInfo = i->second; CHECK_EQ(oInfo.nestingCount_, 0U); j->second.duration_ += oInfo.duration_; j->second.cycleCount_ += oInfo.cycleCount_; @@ -265,23 +256,19 @@ class TimingInstrument { } struct Info { - explicit inline Info(const char *s) - : name_(s ? s : "") - , baseTime_(0) - , nestingCount_(0) - , cycleCount_(0) - , duration_(0) {} + explicit inline Info(const char* s) + : name_(s ? s : ""), baseTime_(0), nestingCount_(0), cycleCount_(0), duration_(0) {} inline Info(const Info& o) - : name_(o.name_) - , baseTime_(o.baseTime_.load()) - , nestingCount_(o.nestingCount_.load()) - , cycleCount_(o.cycleCount_.load()) - , duration_(o.duration_.load()) { + : name_(o.name_), + baseTime_(o.baseTime_.load()), + nestingCount_(o.nestingCount_.load()), + cycleCount_(o.cycleCount_.load()), + duration_(o.duration_.load()) { CHECK_EQ(o.nestingCount_, 0U); } - inline Info& operator = (const Info& o) { + inline Info& operator=(const Info& o) { name_ = o.name_; baseTime_.store(baseTime_.load()); nestingCount_.store(nestingCount_.load()); @@ -298,7 +285,7 @@ class TimingInstrument { return static_cast(duration_) / cycleCount_.load() / 1000.0f; } - std::string name_; + std::string name_; std::atomic baseTime_; std::atomic nestingCount_; std::atomic cycleCount_; // Note that nesting may skew averages @@ -307,7 +294,7 @@ class TimingInstrument { typedef std::unordered_map timing_map_t; - const timing_map_t &data() const { + const timing_map_t& data() const { return data_; } @@ -322,13 +309,11 @@ using timing_map_t = TimingInstrument::timing_map_t; /*! \brief Accumulated scoped timing, indexed by ID */ class TimingItem { public: - inline TimingItem(TimingInstrument *ti, + inline TimingItem(TimingInstrument* ti, int id, - const char *name, + const char* name, const size_t subIterationCount = 1) - : ti_(ti) - , id_(id) - , subIterationCount_(subIterationCount) { + : ti_(ti), id_(id), subIterationCount_(subIterationCount) { if (ti_) { ti_->startTiming(id, name); } @@ -340,12 +325,11 @@ class TimingItem { } private: - TimingInstrument *ti_; + TimingInstrument* ti_; const int id_; const size_t subIterationCount_; }; - } // namespace perf } // namespace test } // namespace mxnet diff --git a/tests/cpp/include/test_tune.h b/tests/cpp/include/test_tune.h index 9f5a2e04c54e..3b2310f68fa5 100644 --- a/tests/cpp/include/test_tune.h +++ b/tests/cpp/include/test_tune.h @@ -21,7 +21,7 @@ * \file test_tune.h * \brief operator tuning tester * \author Chris Olivier -*/ + */ #ifndef TEST_TUNE_H_ #define TEST_TUNE_H_ @@ -60,19 +60,19 @@ namespace tune { * trunk unless you've verified the performance characteristics for that chunk of code * \tparam DType Data type to test */ -template +template class TuningTester { public: using kwargs_t = test::op::kwargs_t; using bool_mode_pair = std::pair; - using shape_vect = mxnet::ShapeVector; + using shape_vect = mxnet::ShapeVector; using shape_vec_to_bool_map = std::map; private: using ShapesToPerfTimingMap = - std::map; + std::map; /*! * \brief Run timing test on various data shapes and sizes @@ -83,13 +83,13 @@ class TuningTester { * \return ShapesToPerfTimingMap map holsing timing data for shapes */ ShapesToPerfTimingMap RunCoreOpTimingTest(const bool isGPU, - const kwargs_t &op_kwargs, + const kwargs_t& op_kwargs, const std::vector& shapes, - const char *op_name, - const char *backward_op_name = "") { + const char* op_name, + const char* backward_op_name = "") { ShapesToPerfTimingMap res; - const kwargs_t kwargs = test::op::CoreOpExecutor::ArgsWithOpName( - op_kwargs, op_name, backward_op_name); + const kwargs_t kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(op_kwargs, op_name, backward_op_name); // prime code and cache before the performance runs test::op::CoreOperatorRunner runner; @@ -98,11 +98,14 @@ class TuningTester { runner.RunBidirectional(false, {{10, 3, 18, 128}}, kwargs, 1); // Do the performance runs - const char *pu = isGPU ? "GPU" : "CPU"; - for (const mxnet::ShapeVector &this_run_shapes : shapes) { + const char* pu = isGPU ? "GPU" : "CPU"; + for (const mxnet::ShapeVector& this_run_shapes : shapes) { test::perf::timing_map_t tmap = runner.TimingTest(std::string(op_name) + " Operator " + pu, - isGPU, false, kwargs, - 0, calls_per_iteration_, + isGPU, + false, + kwargs, + 0, + calls_per_iteration_, this_run_shapes); CHECK(res.find(this_run_shapes) == res.end()); res[this_run_shapes] = tmap; @@ -110,9 +113,9 @@ class TuningTester { return res; } - using tuned_timing_t = std::map< - shape_vect, - std::map<::mxnet::op::tune::TuningMode, test::perf::timing_map_t>, test::less_shapevect>; + using tuned_timing_t = std::map, + test::less_shapevect>; using modesort_t = std::multimap; @@ -125,7 +128,7 @@ class TuningTester { * have made the correct decision, and the TuningMode which was closest in timing to * the Auto mode. */ - static bool_mode_pair CheckCorrectTuning(const modesort_t &mode_sort, + static bool_mode_pair CheckCorrectTuning(const modesort_t& mode_sort, const double closeness_factor = 0.25) { CHECK_EQ(mode_sort.size(), 3U); @@ -145,9 +148,9 @@ class TuningTester { for (auto i = mode_sort.begin(), e = mode_sort.end(); i != e; ++i) { mode2time[i->second] = i->first; } - const double time_auto = mode2time[::mxnet::op::tune::kAuto]; + const double time_auto = mode2time[::mxnet::op::tune::kAuto]; const double time_no_omp = mode2time[::mxnet::op::tune::kNeverOMP]; - const double time_omp = mode2time[::mxnet::op::tune::kAlwaysOMP]; + const double time_omp = mode2time[::mxnet::op::tune::kAlwaysOMP]; // Figure out which one we are closest to and return that to help in the analysis ::mxnet::op::tune::TuningMode closest_to; @@ -160,11 +163,10 @@ class TuningTester { // If difference between OMP and no OMP is < closeness_factor of largest of the two, // then we just want to make sure we are close to both of these const double fastest_standard_time = std::min(time_no_omp, time_omp); - const double allowed_difference = closeness_factor * fastest_standard_time; - const double mustbe_asfast = fastest_standard_time + allowed_difference; + const double allowed_difference = closeness_factor * fastest_standard_time; + const double mustbe_asfast = fastest_standard_time + allowed_difference; - return { time_auto <= mustbe_asfast || closest_to == fastest_standard_mode, - closest_to }; + return {time_auto <= mustbe_asfast || closest_to == fastest_standard_mode, closest_to}; } public: @@ -183,38 +185,37 @@ class TuningTester { } shape_vec_to_bool_map results; // Incredibly inefficient method of grouping the results - for (const auto &i : timing_) { + for (const auto& i : timing_) { // print shapes - const shape_vect &shapes = i.first; + const shape_vect& shapes = i.first; if (verbose || test::csv) { if (!test::csv) { for (size_t x = 0, n = shapes.size(); x < n; ++x) { - const mxnet::TShape &shape = shapes[x]; + const mxnet::TShape& shape = shapes[x]; if (x) { std::cout << ", "; } std::cout << shape; } - const mxnet::TShape &lhs_shape = shapes[0]; + const mxnet::TShape& lhs_shape = shapes[0]; std::cout << " lhs=" << test::pretty_num(lhs_shape.Size()) << " items"; std::cout << "\t(" << TimingDirectionAsString(direction) << ")" << std::endl; } else { std::cout << test::pretty_num(shapes[0].Size()) << ","; } } - const auto &mode2timing = i.second; + const auto& mode2timing = i.second; modesort_t mode_sort; - for (const auto &j : mode2timing) { + for (const auto& j : mode2timing) { const ::mxnet::op::tune::TuningMode mode = j.first; - const test::perf::timing_map_t &tm = j.second; + const test::perf::timing_map_t& tm = j.second; if (tm.find(direction) != tm.end()) { - const test::perf::TimingInstrument::Info &info = tm.find(direction)->second; - double duration = info.TimeEach(); + const test::perf::TimingInstrument::Info& info = tm.find(direction)->second; + double duration = info.TimeEach(); mode_sort.insert({duration, mode}); if (test::csv) { std::cout << TimingDirectionAsString(direction) << "," - << ::mxnet::op::tune::TuningModeToString(mode) << "," - << duration << ","; + << ::mxnet::op::tune::TuningModeToString(mode) << "," << duration << ","; } } } @@ -225,9 +226,9 @@ class TuningTester { // Now we have modes sorted by performance, fastest to slowest const bool_mode_pair result = CheckCorrectTuning(mode_sort); if (verbose && !test::csv) { - for (const auto &k : mode_sort) { - std::cout << "\t" << ::mxnet::op::tune::TuningModeToString(k.second) - << ": " << k.first << " ms"; + for (const auto& k : mode_sort) { + std::cout << "\t" << ::mxnet::op::tune::TuningModeToString(k.second) << ": " << k.first + << " ms"; if (k.second == ::mxnet::op::tune::kAuto) { std::cout << " (" << ::mxnet::op::tune::TuningModeToString(result.second) << ")"; } @@ -251,34 +252,31 @@ class TuningTester { * \param op_name Name by which the operator is registered with nnvm * \param backward_op_name Backward operator name */ - void TestTunedOperator(const kwargs_t &kwargs, + void TestTunedOperator(const kwargs_t& kwargs, const bool verbose, const std::vector& shapevec_vectors, - const char *op_name, - const char *backward_op_name = COREOP_BWD_OP_NAME_VALUE_NONE) { + const char* op_name, + const char* backward_op_name = COREOP_BWD_OP_NAME_VALUE_NONE) { timing_.clear(); using namespace mxnet::op; tuned_timing_t timing; for (int x = 0; x < 1; ++x) { for (auto mode : {::mxnet::op::tune::kNeverOMP, ::mxnet::op::tune::kAuto, - ::mxnet::op::tune::kAlwaysOMP - }) { + ::mxnet::op::tune::kAlwaysOMP}) { if (verbose && !test::csv) { - std::cout << std::endl << ::mxnet::op::tune::TuningModeToString(mode) - << std::endl << std::flush; + std::cout << std::endl + << ::mxnet::op::tune::TuningModeToString(mode) << std::endl + << std::flush; } mxnet::op::OperatorTune::set_tuning_mode(mode); - const ShapesToPerfTimingMap shapes2perfmap = RunCoreOpTimingTest(false, - kwargs, - shapevec_vectors, - op_name, - backward_op_name); - for (const auto &item : shapes2perfmap) { - const shape_vect &shapes = item.first; - const test::perf::timing_map_t &tm = item.second; - timing_[shapes][mode] = tm; + const ShapesToPerfTimingMap shapes2perfmap = + RunCoreOpTimingTest(false, kwargs, shapevec_vectors, op_name, backward_op_name); + for (const auto& item : shapes2perfmap) { + const shape_vect& shapes = item.first; + const test::perf::timing_map_t& tm = item.second; + timing_[shapes][mode] = tm; } } } @@ -292,14 +290,14 @@ class TuningTester { * \return Success rate ratio (#success/#TOTAL) (0.0-1.0) */ float CalculateSuccessRate(std::vector directions = {}, - bool verbose = true) const { + bool verbose = true) const { size_t count = 0, success = 0; if (directions.empty()) { directions = {test::op::kForward, test::op::kBackward}; } for (const test::op::TimingDirection direction : directions) { typename test::tune::TuningTester::shape_vec_to_bool_map res_fwd = - CalculateModeSort(direction, verbose); + CalculateModeSort(direction, verbose); for (auto iter = res_fwd.begin(), e = res_fwd.end(); iter != e; ++iter) { ++count; if (iter->second.first) { @@ -319,16 +317,20 @@ class TuningTester { size_t calls_per_iteration(size_t calls_per_iterations) const { return calls_per_iteration_; } - void set_total_iterations(size_t iterations) { total_iterations_ = iterations; } - size_t total_iterations(size_t iterations) const { return total_iterations_; } + void set_total_iterations(size_t iterations) { + total_iterations_ = iterations; + } + size_t total_iterations(size_t iterations) const { + return total_iterations_; + } private: /*! \brief Number of iterations */ - size_t total_iterations_ = 10; + size_t total_iterations_ = 10; /*! \brief Calls per iteration */ - size_t calls_per_iteration_ = 50; + size_t calls_per_iteration_ = 50; /*! \brief Raw timing data */ - tuned_timing_t timing_; + tuned_timing_t timing_; }; } // namespace tune diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h index 8e270834bbcc..9b495388955c 100644 --- a/tests/cpp/include/test_util.h +++ b/tests/cpp/include/test_util.h @@ -21,7 +21,7 @@ * \file test_util.h * \brief unit test performance analysis functions * \author Chris Olivier -*/ + */ #ifndef TEST_UTIL_H_ #define TEST_UTIL_H_ @@ -49,7 +49,7 @@ extern bool performance_run; extern bool csv; extern bool thread_safety_force_cpu; -template +template inline size_t shapeMemorySize(const mxnet::TShape& shape) { return shape.Size() * sizeof(DType); } @@ -62,11 +62,11 @@ class BlobMemory { inline ~BlobMemory() { Free(); } - void *Alloc(const size_t size) { + void* Alloc(const size_t size) { CHECK_GT(size, 0U); // You've probably made a mistake mxnet::Context context = isGPU_ ? mxnet::Context::GPU(0) : mxnet::Context{}; - Storage *storage = mxnet::Storage::Get(); - handle_ = storage->Alloc(size, context); + Storage* storage = mxnet::Storage::Get(); + handle_ = storage->Alloc(size, context); return handle_.dptr; } void Free() { @@ -79,17 +79,17 @@ class BlobMemory { } private: - const bool isGPU_; + const bool isGPU_; Storage::Handle handle_; }; class StandaloneBlob : public TBlob { public: inline StandaloneBlob(const mxnet::TShape& shape, const bool isGPU, const int dtype) - : TBlob(nullptr, shape, isGPU ? gpu::kDevMask : cpu::kDevMask, dtype) - , memory_(std::make_shared(isGPU)) { - MSHADOW_TYPE_SWITCH(dtype, DType, { - this->dptr_ = memory_->Alloc(shapeMemorySize(shape)); }); + : TBlob(nullptr, shape, isGPU ? gpu::kDevMask : cpu::kDevMask, dtype), + memory_(std::make_shared(isGPU)) { + MSHADOW_TYPE_SWITCH( + dtype, DType, { this->dptr_ = memory_->Alloc(shapeMemorySize(shape)); }); } inline ~StandaloneBlob() { this->dptr_ = nullptr; @@ -100,7 +100,7 @@ class StandaloneBlob : public TBlob { private: /*! \brief Locally allocated memory block for this blob */ - std::shared_ptr memory_; + std::shared_ptr memory_; }; /*! @@ -111,16 +111,14 @@ class StandaloneBlob : public TBlob { class CAccessAsCPU { public: CAccessAsCPU(const RunContext& run_ctx, const TBlob& src, bool copy_back_result = true) - : run_ctx_(run_ctx) - , src_(src) - , copy_back_result_(copy_back_result) { + : run_ctx_(run_ctx), src_(src), copy_back_result_(copy_back_result) { #if MXNET_USE_CUDA if (run_ctx.ctx.dev_type == Context::kCPU) { blob_ = src; } else { Context cpu_ctx, gpu_ctx = run_ctx.ctx; cpu_ctx.dev_type = Context::kCPU; - cpu_ctx.dev_id = 0; + cpu_ctx.dev_id = 0; NDArray on_cpu(src.shape_, cpu_ctx, false, src_.type_flag_); on_cpu.CheckAndAlloc(); blob_ = on_cpu.data(); @@ -140,7 +138,7 @@ class CAccessAsCPU { if (run_ctx_.ctx.dev_type == Context::kGPU) { Context cpu_ctx, gpu_ctx = run_ctx_.ctx; cpu_ctx.dev_type = Context::kCPU; - cpu_ctx.dev_id = 0; + cpu_ctx.dev_id = 0; run_ctx_.get_stream()->Wait(); mxnet::ndarray::Copy(blob_, &src_, gpu_ctx, cpu_ctx, run_ctx_); run_ctx_.get_stream()->Wait(); @@ -148,7 +146,7 @@ class CAccessAsCPU { } #endif } - inline const TBlob& operator ()() const { + inline const TBlob& operator()() const { return blob_; } @@ -168,16 +166,14 @@ class CAccessAsCPU { * \param cb Callback Function to call with CPU-data NDArray */ template -inline void AccessAsCPU(const NDArray &src, - const RunContext &run_ctx, - CallbackFunction cb) { +inline void AccessAsCPU(const NDArray& src, const RunContext& run_ctx, CallbackFunction cb) { #if MXNET_USE_CUDA if (src.ctx().dev_type == Context::kCPU) { cb(src); } else { Context cpu_ctx, gpu_ctx = src.ctx(); cpu_ctx.dev_type = Context::kCPU; - cpu_ctx.dev_id = 0; + cpu_ctx.dev_id = 0; NDArray on_cpu(src.shape(), cpu_ctx, false, src.dtype()); on_cpu.CheckAndAlloc(); TBlob tmp1 = on_cpu.data(); @@ -202,9 +198,7 @@ inline void AccessAsCPU(const NDArray &src, * \param cb Callback Function to call with CPU-data TBlob */ template -inline void AccessAsCPU(const TBlob& src, - const RunContext &run_ctx, - CallbackFunction cb) { +inline void AccessAsCPU(const TBlob& src, const RunContext& run_ctx, CallbackFunction cb) { #if MXNET_USE_CUDA if (run_ctx.ctx.dev_type == Context::kCPU) { cb(src); @@ -217,11 +211,11 @@ inline void AccessAsCPU(const TBlob& src, } constexpr const size_t MPRINT_PRECISION = 5; -template -inline void fill(const RunContext &run_ctx, const TBlob& _blob, const DType val) { +template +inline void fill(const RunContext& run_ctx, const TBlob& _blob, const DType val) { AccessAsCPU(_blob, run_ctx, [val](const TBlob& blob) { MSHADOW_TYPE_SWITCH(blob.type_flag_, DTypeX, { - DTypeX *p1 = blob.dptr(); + DTypeX* p1 = blob.dptr(); for (size_t i = 0, n = blob.Size(); i < n; ++i) { *p1++ = val; } @@ -229,16 +223,16 @@ inline void fill(const RunContext &run_ctx, const TBlob& _blob, const DType val) }); } -template -inline void try_fill(const RunContext &run_ctx, const TBlob *blob, const DType val) { +template +inline void try_fill(const RunContext& run_ctx, const TBlob* blob, const DType val) { if (blob) { fill(run_ctx, *blob, val); } } -template -inline void dump(Stream *os, const TBlob& blob, const char *suffix = "f") { - DType *p1 = blob.dptr(); +template +inline void dump(Stream* os, const TBlob& blob, const char* suffix = "f") { + DType* p1 = blob.dptr(); for (size_t i = 0, n = blob.Size(); i < n; ++i) { if (i) { *os << ", "; @@ -257,7 +251,6 @@ inline void dump(Stream *os, const TBlob& blob, const char *suffix = "f") { } } - /*! \brief Return reference to data at position indexes */ inline index_t getMult(const mxnet::TShape& shape, const index_t axis) { return axis < shape.ndim() ? shape[axis] : 1; @@ -279,18 +272,19 @@ inline index_t offset(const mxnet::TShape& shape, const std::vector& ind } /*! \brief Return reference to data at position indexes */ -template -inline const DType& data_at(const TBlob *blob, const std::vector& indices) { +template +inline const DType& data_at(const TBlob* blob, const std::vector& indices) { return blob->dptr()[offset(blob->shape_, indices)]; } /*! \brief Set data at position indexes */ -template -inline DType& data_ref(const TBlob *blob, const std::vector& indices) { +template +inline DType& data_ref(const TBlob* blob, const std::vector& indices) { return blob->dptr()[offset(blob->shape_, indices)]; } -inline std::string repeatedStr(const char *s, const signed int count, +inline std::string repeatedStr(const char* s, + const signed int count, const bool trailSpace = false) { if (count <= 0) { return std::string(); @@ -311,9 +305,11 @@ inline std::string repeatedStr(const char *s, const signed int count, } /*! \brief Pretty print a shape with optional label */ -template -inline StreamType& print_shape(StreamType *_os, const std::string& label, - const mxnet::TShape& shape, const bool add_endl = true) { +template +inline StreamType& print_shape(StreamType* _os, + const std::string& label, + const mxnet::TShape& shape, + const bool add_endl = true) { if (!label.empty()) { *_os << label << ": "; } @@ -334,21 +330,21 @@ inline StreamType& print_shape(StreamType *_os, const std::string& label, } /*! \brief Pretty print a 1D, 2D, or 3D blob */ -template +template inline StreamType& print_blob_(const RunContext& ctx, - StreamType *_os, - const TBlob &blob, + StreamType* _os, + const TBlob& blob, const bool doChannels = true, - const bool doBatches = true, - const bool add_endl = true) { + const bool doBatches = true, + const bool add_endl = true) { #if MXNET_USE_CUDA if (blob.dev_mask() == gpu::kDevMask) { - return print_blob_(ctx, _os, CAccessAsCPU(ctx, blob, false)(), doChannels, - doBatches, add_endl); + return print_blob_( + ctx, _os, CAccessAsCPU(ctx, blob, false)(), doChannels, doBatches, add_endl); } #endif // MXNET_USE_CUDA - StreamType &os = *_os; + StreamType& os = *_os; const size_t dim = static_cast(blob.ndim()); if (dim == 1) { @@ -372,9 +368,9 @@ inline StreamType& print_blob_(const RunContext& ctx, const size_t batchSize = blob.size(0); size_t channels = 1; - size_t depth = 1; - size_t height = 1; - size_t width = 1; + size_t depth = 1; + size_t height = 1; + size_t width = 1; if (dim > 1) { channels = blob.size(1); if (dim > 2) { @@ -382,7 +378,7 @@ inline StreamType& print_blob_(const RunContext& ctx, width = blob.size(2); } else if (dim == 4) { height = blob.size(2); - width = blob.size(3); + width = blob.size(3); } else { depth = blob.size(2); if (dim > 3) { @@ -434,8 +430,8 @@ inline StreamType& print_blob_(const RunContext& ctx, break; } os << repeatedStr("(", dd); - os << std::fixed << std::setw(7) << std::setprecision(MPRINT_PRECISION) - << std::right << val << " "; + os << std::fixed << std::setw(7) << std::setprecision(MPRINT_PRECISION) << std::right + << val << " "; os << repeatedStr(")", dd, true); } } @@ -447,7 +443,8 @@ inline StreamType& print_blob_(const RunContext& ctx, if (!doBatches) { break; } else { - os << " |" << std::flush;; + os << " |" << std::flush; + ; } } if (r < height - 1) { @@ -468,34 +465,38 @@ inline StreamType& print_blob_(const RunContext& ctx, return os; } -template +template inline StreamType& print(const RunContext& ctx, - StreamType *_os, - const TBlob &blob, + StreamType* _os, + const TBlob& blob, const bool doChannels = true, - const bool doBatches = true, - const bool add_endl = true) { + const bool doBatches = true, + const bool add_endl = true) { MSHADOW_TYPE_SWITCH(blob.type_flag_, DType, { print_blob_(ctx, _os, blob, doChannels, doBatches, add_endl); }); return *_os; } -template -inline StreamType& print(const RunContext& ctx, StreamType *_os, const std::string &label, - const TBlob &blob, +template +inline StreamType& print(const RunContext& ctx, + StreamType* _os, + const std::string& label, + const TBlob& blob, const bool doChannels = true, - bool doBatches = true, - const bool add_endl = true) { + bool doBatches = true, + const bool add_endl = true) { if (!label.empty()) { *_os << label << ": "; } return print(ctx, _os, blob, doChannels, doBatches, add_endl); } -template -inline StreamType& print(const RunContext& ctx, StreamType *_os, - const std::string& label, const NDArray& arr) { +template +inline StreamType& print(const RunContext& ctx, + StreamType* _os, + const std::string& label, + const NDArray& arr) { if (!label.empty()) { *_os << label << ": "; } @@ -505,7 +506,7 @@ inline StreamType& print(const RunContext& ctx, StreamType *_os, const mxnet::TShape& shape = arr.shape(); print_shape(_os, "[row_sparse] main shape", shape, false); const mxnet::TShape& storage_shape = arr.storage_shape(); - const bool is_one_row = storage_shape[0] < 2; + const bool is_one_row = storage_shape[0] < 2; print_shape(_os, "storage shape", storage_shape, false); print(ctx, _os, arr.data(), true, true, !is_one_row); @@ -520,7 +521,7 @@ inline StreamType& print(const RunContext& ctx, StreamType *_os, const mxnet::TShape& shape = arr.shape(); print_shape(_os, "[CSR] main shape", shape, false); const mxnet::TShape& storage_shape = arr.storage_shape(); - const bool is_one_row = storage_shape[0] < 2; + const bool is_one_row = storage_shape[0] < 2; print_shape(_os, "storage shape", storage_shape, false); print(ctx, _os, arr.data(), true, true, !is_one_row); @@ -539,7 +540,7 @@ inline StreamType& print(const RunContext& ctx, StreamType *_os, case kDefaultStorage: { // data const mxnet::TShape& shape = arr.shape(); - const bool is_one_row = shape[0] < 2; + const bool is_one_row = shape[0] < 2; print_shape(_os, "[dense] main shape", shape, !is_one_row); print(ctx, _os, arr.data(), true, true, !is_one_row) << std::endl; break; @@ -575,26 +576,30 @@ inline void print(const RunContext& ctx, } } -inline std::string demangle(const char *name) { +inline std::string demangle(const char* name) { #if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) int status = -4; // some arbitrary value to eliminate the compiler warning - std::unique_ptr res { - abi::__cxa_demangle(name, nullptr, nullptr, &status), - &std::free - }; + std::unique_ptr res{abi::__cxa_demangle(name, nullptr, nullptr, &status), + &std::free}; return status ? name : res.get(); #else return name; #endif } -template -inline std::string type_name() { return demangle(typeid(T).name()); } +template +inline std::string type_name() { + return demangle(typeid(T).name()); +} -#define PRINT_NDARRAYS(__ctx$, __var) test::print(__ctx$, __FUNCTION__, #__var, __var) -#define PRINT_OP_AND_ARRAYS(__ctx$, __op, __var) test::print(__ctx$, __FUNCTION__, \ - static_cast(&(std::stringstream() << #__var << \ - "<" << type_name<__op>() << ">"))->str(), __var) +#define PRINT_NDARRAYS(__ctx$, __var) test::print(__ctx$, __FUNCTION__, #__var, __var) +#define PRINT_OP_AND_ARRAYS(__ctx$, __op, __var) \ + test::print(__ctx$, \ + __FUNCTION__, \ + static_cast( \ + &(std::stringstream() << #__var << "<" << type_name<__op>() << ">")) \ + ->str(), \ + __var) #define PRINT_OP2_AND_ARRAYS(__ctx$, __op1, __op2, __var) test::print(__ctx$, __FUNCTION__, \ static_cast(&(std::stringstream() << #__var << \ "<" << type_name<__op1>().name()) << ", " \ @@ -606,18 +611,18 @@ inline std::string type_name() { return demangle(typeid(T).name()); } * 2D: batch item -> channel -> row -> col * 3D: batch item -> channel -> col */ -template +template static inline void patternFill(const RunContext& run_ctx, - const TBlob *_blob, + const TBlob* _blob, GetNextData getNextData) { AccessAsCPU(*_blob, run_ctx, [getNextData](const TBlob& blob) { const size_t dim = static_cast(blob.ndim()); CHECK_LE(dim, 5U) << "Will need to handle above 3 dimensions (another for loop)"; - const size_t num = blob.size(0); - const size_t channels = dim > 1 ? blob.size(1) : 1; - const size_t depth = dim > 2 ? blob.size(2) : 1; - const size_t height = dim > 3 ? blob.size(3) : 1; - const size_t width = dim > 4 ? blob.size(4) : 1; + const size_t num = blob.size(0); + const size_t channels = dim > 1 ? blob.size(1) : 1; + const size_t depth = dim > 2 ? blob.size(2) : 1; + const size_t height = dim > 3 ? blob.size(3) : 1; + const size_t width = dim > 4 ? blob.size(4) : 1; const size_t numberOfIndexes = blob.shape_.Size(); for (size_t n = 0; n < num; ++n) { if (dim > 1) { @@ -632,8 +637,8 @@ static inline void patternFill(const RunContext& run_ctx, const size_t idx = test::offset(blob.shape_, {n, ch, d, row, col}); CHECK_LT(idx, numberOfIndexes); MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { - ThisDataType &f = blob.dptr()[idx]; - f = getNextData(); + ThisDataType& f = blob.dptr()[idx]; + f = getNextData(); }); } else { CHECK(dim <= 5) << "Unimplemented dimension: " << dim; @@ -643,8 +648,8 @@ static inline void patternFill(const RunContext& run_ctx, const size_t idx = test::offset(blob.shape_, {n, ch, d, row}); CHECK_LT(idx, numberOfIndexes); MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { - ThisDataType &f = blob.dptr()[idx]; - f = getNextData(); + ThisDataType& f = blob.dptr()[idx]; + f = getNextData(); }); } } @@ -652,8 +657,8 @@ static inline void patternFill(const RunContext& run_ctx, const size_t idx = test::offset(blob.shape_, {n, ch, d}); CHECK_LT(idx, numberOfIndexes); MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { - ThisDataType &f = blob.dptr()[idx]; - f = getNextData(); + ThisDataType& f = blob.dptr()[idx]; + f = getNextData(); }); } } @@ -661,8 +666,8 @@ static inline void patternFill(const RunContext& run_ctx, const size_t idx = test::offset(blob.shape_, {n, ch}); CHECK_LT(idx, numberOfIndexes); MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { - ThisDataType &f = blob.dptr()[idx]; - f = getNextData(); + ThisDataType& f = blob.dptr()[idx]; + f = getNextData(); }); } } @@ -670,8 +675,8 @@ static inline void patternFill(const RunContext& run_ctx, const size_t idx = test::offset(blob.shape_, {n}); CHECK_LT(idx, numberOfIndexes); MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, { - ThisDataType &f = blob.dptr()[idx]; - f = getNextData(); + ThisDataType& f = blob.dptr()[idx]; + f = getNextData(); }); } } @@ -679,12 +684,10 @@ static inline void patternFill(const RunContext& run_ctx, } /*! \brief Return a random number within a given range (inclusive) */ -template +template inline ScalarType rangedRand(const ScalarType min, const ScalarType max) { - uint64_t num_bins = static_cast(max + 1), - num_rand = static_cast(RAND_MAX), - bin_size = num_rand / num_bins, - defect = num_rand % num_bins; + uint64_t num_bins = static_cast(max + 1), num_rand = static_cast(RAND_MAX), + bin_size = num_rand / num_bins, defect = num_rand % num_bins; ScalarType x; do { x = std::rand(); @@ -700,7 +703,7 @@ inline ScalarType rangedRand(const ScalarType min, const ScalarType max) { * \param s2 Second shape * \return true if s1 is less than s2 */ -inline bool operator < (const mxnet::TShape &s1, const mxnet::TShape &s2) { +inline bool operator<(const mxnet::TShape& s1, const mxnet::TShape& s2) { if (s1.Size() == s2.Size()) { if (s1.ndim() == s2.ndim()) { for (size_t i = 0, n = s1.ndim(); i < n; ++i) { @@ -723,8 +726,7 @@ inline bool operator < (const mxnet::TShape &s1, const mxnet::TShape &s2) { * \param v2 Second vector of shapes * \return true if v1 is less than v2 */ -inline bool operator < (const std::vector& v1, - const std::vector& v2) { +inline bool operator<(const std::vector& v1, const std::vector& v2) { if (v1.size() == v2.size()) { for (size_t i = 0, n = v1.size(); i < n; ++i) { if (v1[i] == v2[i]) { @@ -774,25 +776,23 @@ inline std::string pretty_num(uint64_t val) { } /*! \brief Change a value during the scope of this declaration */ -template +template struct ScopeSet { - inline ScopeSet(T *var, const T tempValue) - : var_(*var) - , saveValue_(var) { + inline ScopeSet(T* var, const T tempValue) : var_(*var), saveValue_(var) { *var = tempValue; } inline ~ScopeSet() { var_ = saveValue_; } T& var_; - T saveValue_; + T saveValue_; }; - -static void AssertEqual(const std::vector &in_arrs, - const std::vector &out_arrs, - float rtol = 1e-5, float atol = 1e-8, - bool test_first_only = false) { +static void AssertEqual(const std::vector& in_arrs, + const std::vector& out_arrs, + float rtol = 1e-5, + float atol = 1e-8, + bool test_first_only = false) { for (size_t j = 0; j < in_arrs.size(); ++j) { // When test_all is fir if (test_first_only && j == 1) { @@ -811,12 +811,10 @@ static void AssertEqual(const std::vector &in_arrs, tmp2 = tmp2.Reorder2Default(); #endif EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size()); - TBlob blob1 = tmp1.data(); - TBlob blob2 = tmp2.data(); - mshadow::default_real_t *d1 = - static_cast(blob1.dptr_); - mshadow::default_real_t *d2 = - static_cast(blob2.dptr_); + TBlob blob1 = tmp1.data(); + TBlob blob2 = tmp2.data(); + mshadow::default_real_t* d1 = static_cast(blob1.dptr_); + mshadow::default_real_t* d2 = static_cast(blob2.dptr_); for (int i = 0; i < tmp1.shape().Size(); i++) { float abs_err = fabs((d1[i]) - (d2[i])); ASSERT_LE(abs_err, (atol + rtol * fabs(d2[i]))) @@ -825,8 +823,6 @@ static void AssertEqual(const std::vector &in_arrs, } } - - } // namespace test } // namespace mxnet @@ -836,7 +832,7 @@ inline void usleep(__int64 usec) { LARGE_INTEGER ft; // Convert to 100 nanosecond interval, negative value indicates relative time - ft.QuadPart = -(10*usec); + ft.QuadPart = -(10 * usec); timer = CreateWaitableTimer(NULL, TRUE, NULL); SetWaitableTimer(timer, &ft, 0, NULL, NULL, 0); diff --git a/tests/cpp/kvstore/gpu_topology_test.cc b/tests/cpp/kvstore/gpu_topology_test.cc index d26894c21ea7..49c32502cc44 100644 --- a/tests/cpp/kvstore/gpu_topology_test.cc +++ b/tests/cpp/kvstore/gpu_topology_test.cc @@ -21,7 +21,7 @@ * Copyright (c) 2018 by Contributors * \file gpu_topology_test.cc * \brief gpu topology tests -*/ + */ #if MXNET_USE_CUDA @@ -33,17 +33,17 @@ void GenerateMatrix(std::vector* W, int num_gpus, std::mt19937* gen) { std::uniform_real_distribution<> dis(0., 1.); for (int row = 0; row < num_gpus; ++row) { - for (int col = row+1; col < num_gpus; ++col) { + for (int col = row + 1; col < num_gpus; ++col) { double sample = dis(*gen); if (sample < 0.33) { - (*W)[row*num_gpus+col] = 1.; - (*W)[col*num_gpus+row] = 1.; + (*W)[row * num_gpus + col] = 1.; + (*W)[col * num_gpus + row] = 1.; } else if (sample < 0.66f) { - (*W)[row*num_gpus+col] = 2.; - (*W)[col*num_gpus+row] = 2.; + (*W)[row * num_gpus + col] = 2.; + (*W)[col * num_gpus + row] = 2.; } else { - (*W)[row*num_gpus+col] = 3.; - (*W)[col*num_gpus+row] = 3.; + (*W)[row * num_gpus + col] = 3.; + (*W)[col * num_gpus + row] = 3.; } } } @@ -53,7 +53,7 @@ bool IsSatisfactory(const std::vector& W, int num_gpus, int depth) { for (int row = 0; row < num_gpus; ++row) { int out_edges = 0; for (int col = 0; col < num_gpus; ++col) { - if (W[row*num_gpus+col] > 0.f) + if (W[row * num_gpus + col] > 0.f) out_edges++; } if (out_edges < depth) @@ -63,11 +63,10 @@ bool IsSatisfactory(const std::vector& W, int num_gpus, int depth) { } // Generates random link topology matrix using random number generator -void TestComputeTreesRandomized(int num_gpus, float alpha, int backtrack, - std::mt19937* gen) { +void TestComputeTreesRandomized(int num_gpus, float alpha, int backtrack, std::mt19937* gen) { std::uniform_real_distribution<> dis(0.f, 1.f); bool satisfied = false; - std::vector W(num_gpus*num_gpus, 0.f); + std::vector W(num_gpus * num_gpus, 0.f); int depth = mxnet::kvstore::ComputeDepth(num_gpus); GenerateMatrix(&W, num_gpus, gen); satisfied = IsSatisfactory(W, num_gpus, depth); @@ -82,7 +81,7 @@ void TestComputeTreesRandomized(int num_gpus, float alpha, int backtrack, mxnet::kvstore::ComputeTrees(W, num_gpus, alpha, backtrack, &topo, &scan); unsigned correct_topo_size = (1 << (depth + 1)) - 1; - unsigned correct_scan_size = depth+2; + unsigned correct_scan_size = depth + 2; ASSERT_EQ(topo.size(), static_cast(num_gpus)); for (unsigned i = 0; i < topo.size(); ++i) { ASSERT_EQ(correct_topo_size, topo[i].size()); @@ -92,26 +91,24 @@ void TestComputeTreesRandomized(int num_gpus, float alpha, int backtrack, // Permutes matrix W using permutation vector P and stores output in matrix A // Assumption: W is square and symmetric -void PermuteMatrix(const std::vector& W, - const std::vector& P, - std::vector* A) { +void PermuteMatrix(const std::vector& W, const std::vector& P, std::vector* A) { int nrows = P.size(); - std::vector temp(nrows*nrows, 0); + std::vector temp(nrows * nrows, 0); int count = 0; - for (int row=0; row < nrows; ++row) { - for (int col=0; col < nrows; ++col) { + for (int row = 0; row < nrows; ++row) { + for (int col = 0; col < nrows; ++col) { int row_start = P[row]; - temp[count] = W[row_start*nrows+col]; + temp[count] = W[row_start * nrows + col]; count++; } } count = 0; - for (int row=0; row < nrows; ++row) { - for (int col=0; col < nrows; ++col) { + for (int row = 0; row < nrows; ++row) { + for (int col = 0; col < nrows; ++col) { int col_index = P[col]; - (*A)[count] = temp[row*nrows+col_index]; + (*A)[count] = temp[row * nrows + col_index]; count++; } } @@ -121,7 +118,7 @@ TEST(GpuTopology, TestFormTopology) { std::vector state0 = {3, 2, 1, 5, 0, 0, 4, 6}; std::vector topo0; std::vector scan0; - std::vector correct0 = {3, 3, 0, 3, 1, 0, 4, 3, 2, 1, 5, 0, 0, 4, 6}; + std::vector correct0 = {3, 3, 0, 3, 1, 0, 4, 3, 2, 1, 5, 0, 0, 4, 6}; std::vector correct_scan0 = {0, 1, 3, 7, 15}; mxnet::kvstore::FormTopology(state0, &topo0, &scan0, 3); ASSERT_EQ(topo0.size(), correct0.size()); @@ -134,7 +131,7 @@ TEST(GpuTopology, TestFormTopology) { std::vector state1 = {3, 2, 0, 4, 1, 1, 5, 6}; std::vector topo1; std::vector scan1; - std::vector correct1 = {3, 3, 1, 3, 0, 1, 5, 3, 2, 0, 4, 1, 1, 5, 6}; + std::vector correct1 = {3, 3, 1, 3, 0, 1, 5, 3, 2, 0, 4, 1, 1, 5, 6}; std::vector correct_scan1 = {0, 1, 3, 7, 15}; mxnet::kvstore::FormTopology(state1, &topo1, &scan1, 3); ASSERT_EQ(topo1.size(), correct1.size()); @@ -146,13 +143,8 @@ TEST(GpuTopology, TestFormTopology) { } TEST(GpuTopology, TestComputeTreeWeight) { - std::vector W = {0, 2, 2, 3, 3, 0, 0, - 2, 0, 3, 2, 0, 3, 0, - 2, 3, 0, 3, 0, 0, 2, - 3, 2, 3, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 2, 2, - 0, 3, 0, 0, 2, 0, 3, - 0, 0, 2, 0, 2, 3, 0}; + std::vector W = {0, 2, 2, 3, 3, 0, 0, 2, 0, 3, 2, 0, 3, 0, 2, 3, 0, 3, 0, 0, 2, 3, 2, 3, 0, + 0, 0, 0, 3, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 2, 0, 3, 0, 0, 2, 0, 2, 3, 0}; std::vector state0 = {3, 2, 1, 5, 0, 0, 4, 6}; ASSERT_EQ(mxnet::kvstore::ComputeTreeWeight(W, state0, 7, 3, false), 16); @@ -180,7 +172,7 @@ TEST(GpuTopology, TestPostprocess) { for (unsigned i = 0; i < correct2.size(); ++i) ASSERT_EQ(result2[i], correct2[i]); - std::vector result3 = {10, 10, 0, 0, 0, 0, 0, 1, 2, 3, 6, 4, 7, 5, 8, 9}; + std::vector result3 = {10, 10, 0, 0, 0, 0, 0, 1, 2, 3, 6, 4, 7, 5, 8, 9}; std::vector correct3 = {10, 10, 10, 10, 0, 0, 0, 1, 2, 3, 6, 4, 7, 5, 8, 9}; mxnet::kvstore::Postprocess(&result3, 11, 4); for (unsigned i = 0; i < correct3.size(); ++i) @@ -198,13 +190,8 @@ TEST(GpuTopology, TestDepth) { } TEST(GpuTopology, TestIsValid) { - std::vector W = {0, 2, 2, 3, 3, 0, 0, - 2, 0, 3, 2, 0, 3, 0, - 2, 3, 0, 3, 0, 0, 2, - 3, 2, 3, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 2, 2, - 0, 3, 0, 0, 2, 0, 3, - 0, 0, 2, 0, 2, 3, 0}; + std::vector W = {0, 2, 2, 3, 3, 0, 0, 2, 0, 3, 2, 0, 3, 0, 2, 3, 0, 3, 0, 0, 2, 3, 2, 3, 0, + 0, 0, 0, 3, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 2, 0, 3, 0, 0, 2, 0, 2, 3, 0}; std::vector state0 = {3, 2, 1, 5, 0, 0, 4, 6}; ASSERT_EQ(mxnet::kvstore::IsValid(W, state0, 7, 7, 3), true); @@ -260,7 +247,7 @@ TEST(GpuTopology, TestEwisemult) { std::vector x(8, 1); std::vector y(8, 0); std::iota(y.begin(), y.end(), 0); - int alpha = 5; + int alpha = 5; std::vector correct_y = {0, 5, 10, 15, 20, 25, 30, 35}; mxnet::kvstore::ewisemult(x, alpha, &y); @@ -271,14 +258,9 @@ TEST(GpuTopology, TestEwisemult) { // FindBestMoveTest TEST(GpuTopology, TestFindBestMove) { - std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, - 2, 0, 3, 2, 1, 3, 1, 1, - 2, 3, 0, 3, 1, 1, 2, 1, - 3, 2, 3, 0, 1, 1, 1, 2, - 3, 1, 1, 1, 0, 2, 2, 3, - 1, 3, 1, 1, 2, 0, 3, 2, - 1, 1, 2, 1, 2, 3, 0, 3, - 1, 1, 1, 2, 3, 2, 3, 0}; + std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, 2, 0, 3, 2, 1, 3, 1, 1, 2, 3, 0, 3, 1, 1, + 2, 1, 3, 2, 3, 0, 1, 1, 1, 2, 3, 1, 1, 1, 0, 2, 2, 3, 1, 3, 1, 1, + 2, 0, 3, 2, 1, 1, 2, 1, 2, 3, 0, 3, 1, 1, 1, 2, 3, 2, 3, 0}; std::vector P(8, 0); std::iota(P.begin(), P.end(), 1); std::unordered_set used; @@ -311,32 +293,32 @@ TEST(GpuTopology, TestGetRoot) { // Test when roots are non-empty, and matches color std::unordered_set roots1 = {0, 2, 4, 6}; - std::vector color1 = {0, 1, 2, 3}; + std::vector color1 = {0, 1, 2, 3}; for (unsigned i = 0; i < color1.size(); ++i) { - int root1 = mxnet::kvstore::GetRoot(P, color1[i], roots1); - int correct_root1 = 2*i; + int root1 = mxnet::kvstore::GetRoot(P, color1[i], roots1); + int correct_root1 = 2 * i; ASSERT_EQ(root1, correct_root1); } // Test when roots is empty std::unordered_set roots2; - int color2 = 0; + int color2 = 0; int correct_root2 = -1; - int root2 = mxnet::kvstore::GetRoot(P, color2, roots2); + int root2 = mxnet::kvstore::GetRoot(P, color2, roots2); ASSERT_EQ(root2, correct_root2); // Test when roots is non-empty, but no root matches color std::unordered_set roots3 = {0}; - int color3 = 1; - int correct_root3 = -1; - int root3 = mxnet::kvstore::GetRoot(P, color3, roots3); + int color3 = 1; + int correct_root3 = -1; + int root3 = mxnet::kvstore::GetRoot(P, color3, roots3); ASSERT_EQ(root3, correct_root3); - std::vector P2 = {0, 1, 1, 0, 2, 3, 3, 2}; + std::vector P2 = {0, 1, 1, 0, 2, 3, 3, 2}; std::unordered_set roots4 = roots1; - int color4 = 0; - int correct_root4 = 0; - int root4 = mxnet::kvstore::GetRoot(P, color4, roots4); + int color4 = 0; + int correct_root4 = 0; + int root4 = mxnet::kvstore::GetRoot(P, color4, roots4); ASSERT_EQ(root4, correct_root4); } @@ -345,37 +327,32 @@ TEST(GpuTopology, TestGetChild) { std::vector P = {0, 0, 1, 2, 2, 2, 3, 3}; // Test when color is not found - int color1 = 4; - int parent1 = 4; + int color1 = 4; + int parent1 = 4; int correct_child1 = -1; - int child1 = mxnet::kvstore::GetChild(P, color1, parent1); + int child1 = mxnet::kvstore::GetChild(P, color1, parent1); ASSERT_EQ(child1, correct_child1); // Test when color is found, but is equal to parent - int color2 = 1; - int parent2 = 2; + int color2 = 1; + int parent2 = 2; int correct_child2 = -1; - int child2 = mxnet::kvstore::GetChild(P, color2, parent2); + int child2 = mxnet::kvstore::GetChild(P, color2, parent2); ASSERT_EQ(child2, correct_child2); // Test when color is found and not equal to parent - int color3 = 3; - int parent3 = 6; + int color3 = 3; + int parent3 = 6; int correct_child3 = 7; - int child3 = mxnet::kvstore::GetChild(P, color3, parent3); + int child3 = mxnet::kvstore::GetChild(P, color3, parent3); ASSERT_EQ(child3, correct_child3); } // FindBestEdgeTest TEST(GpuTopology, TestFindBestEdge) { - std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, - 2, 0, 3, 2, 1, 3, 1, 1, - 2, 3, 0, 3, 1, 1, 2, 1, - 3, 2, 3, 0, 1, 1, 1, 2, - 3, 1, 1, 1, 0, 2, 2, 3, - 1, 3, 1, 1, 2, 0, 3, 2, - 1, 1, 2, 1, 2, 3, 0, 3, - 1, 1, 1, 2, 3, 2, 3, 0}; + std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, 2, 0, 3, 2, 1, 3, 1, 1, 2, 3, 0, 3, 1, 1, + 2, 1, 3, 2, 3, 0, 1, 1, 1, 2, 3, 1, 1, 1, 0, 2, 2, 3, 1, 3, 1, 1, + 2, 0, 3, 2, 1, 1, 2, 1, 2, 3, 0, 3, 1, 1, 1, 2, 3, 2, 3, 0}; std::vector P(8, 0); std::unordered_set used; @@ -384,7 +361,7 @@ TEST(GpuTopology, TestFindBestEdge) { std::vector b1; int g1; std::vector correct_b1 = {0, 2}; - int correct_g1 = 3; + int correct_g1 = 3; mxnet::kvstore::FindBestEdge(W, P, parent1, dest1, &b1, &g1); ASSERT_EQ(b1.size(), correct_b1.size()); for (unsigned i = 0; i < b1.size(); ++i) @@ -397,7 +374,7 @@ TEST(GpuTopology, TestFindBestEdge) { std::vector b2; int g2; std::vector correct_b2 = {-1}; - int correct_g2 = 0; + int correct_g2 = 0; mxnet::kvstore::FindBestEdge(W, P, parent2, dest2, &b2, &g2); ASSERT_EQ(b2.size(), correct_b2.size()); for (unsigned i = 0; i < b2.size(); ++i) @@ -407,14 +384,9 @@ TEST(GpuTopology, TestFindBestEdge) { // KLGenerateBinaryTreeTest TEST(GpuTopology, TestKLGenerateBinaryTree1) { - std::vector W = {0, 2, 3, 3, 3, 1, 1, 1, - 2, 0, 3, 2, 1, 3, 1, 1, - 2, 3, 0, 3, 1, 1, 2, 1, - 3, 2, 3, 0, 1, 1, 1, 2, - 3, 1, 1, 1, 0, 2, 3, 3, - 1, 3, 1, 1, 2, 0, 3, 2, - 1, 1, 2, 1, 2, 3, 0, 3, - 1, 1, 1, 2, 3, 2, 3, 0}; + std::vector W = {0, 2, 3, 3, 3, 1, 1, 1, 2, 0, 3, 2, 1, 3, 1, 1, 2, 3, 0, 3, 1, 1, + 2, 1, 3, 2, 3, 0, 1, 1, 1, 2, 3, 1, 1, 1, 0, 2, 3, 3, 1, 3, 1, 1, + 2, 0, 3, 2, 1, 1, 2, 1, 2, 3, 0, 3, 1, 1, 1, 2, 3, 2, 3, 0}; std::vector P = {0, 1, 1, 0, 2, 3, 3, 2}; std::vector> cluster_pairs; cluster_pairs.push_back(std::pair(0, -2)); @@ -422,11 +394,10 @@ TEST(GpuTopology, TestKLGenerateBinaryTree1) { cluster_pairs.push_back(std::pair(2, -2)); cluster_pairs.push_back(std::pair(3, -2)); std::unordered_set roots = {0, 2, 4, 6}; - std::vector topo = {0, 2, 4, 6}; + std::vector topo = {0, 2, 4, 6}; std::vector scan(2, 0); std::mt19937 gen(1); - mxnet::kvstore::KLGenerateBinaryTree(W, P, &cluster_pairs, &roots, &topo, - &scan, &gen); + mxnet::kvstore::KLGenerateBinaryTree(W, P, &cluster_pairs, &roots, &topo, &scan, &gen); std::vector correct_topo = {0, 2, 4, 6, 0, 3, 2, 1, 4, 7, 6, 5}; std::vector correct_scan = {0, 0, 4}; ASSERT_EQ(topo.size(), correct_topo.size()); @@ -438,14 +409,9 @@ TEST(GpuTopology, TestKLGenerateBinaryTree1) { } TEST(GpuTopology, TestKLGenerateBinaryTree2) { - std::vector W = {0, 2, 3, 3, 3, 1, 1, 1, - 2, 0, 3, 2, 1, 3, 1, 1, - 2, 3, 0, 3, 1, 1, 2, 1, - 3, 2, 3, 0, 1, 1, 1, 2, - 3, 1, 1, 1, 0, 2, 3, 3, - 1, 3, 1, 1, 2, 0, 3, 2, - 1, 1, 2, 1, 2, 3, 0, 3, - 1, 1, 1, 2, 3, 2, 3, 0}; + std::vector W = {0, 2, 3, 3, 3, 1, 1, 1, 2, 0, 3, 2, 1, 3, 1, 1, 2, 3, 0, 3, 1, 1, + 2, 1, 3, 2, 3, 0, 1, 1, 1, 2, 3, 1, 1, 1, 0, 2, 3, 3, 1, 3, 1, 1, + 2, 0, 3, 2, 1, 1, 2, 1, 2, 3, 0, 3, 1, 1, 1, 2, 3, 2, 3, 0}; std::vector P = {0, 1, 1, 0, 2, 3, 3, 2}; std::vector> cluster_pairs; cluster_pairs.push_back(std::pair(0, -2)); @@ -453,11 +419,10 @@ TEST(GpuTopology, TestKLGenerateBinaryTree2) { cluster_pairs.push_back(std::pair(2, -2)); cluster_pairs.push_back(std::pair(3, -2)); std::unordered_set roots = {0, 2, 4, 6}; - std::vector topo = {0, 6, 4, 2}; + std::vector topo = {0, 6, 4, 2}; std::vector scan(2, 0); std::mt19937 gen(1); - mxnet::kvstore::KLGenerateBinaryTree(W, P, &cluster_pairs, &roots, &topo, - &scan, &gen); + mxnet::kvstore::KLGenerateBinaryTree(W, P, &cluster_pairs, &roots, &topo, &scan, &gen); std::vector correct_topo = {0, 6, 4, 2, 0, 3, 6, 5, 4, 7, 2, 1}; std::vector correct_scan = {0, 0, 4}; ASSERT_EQ(topo.size(), correct_topo.size()); @@ -470,13 +435,11 @@ TEST(GpuTopology, TestKLGenerateBinaryTree2) { // UpdateWeightTest TEST(GpuTopology, TestUpdateWeight) { - std::vector W = {0.f, 1.f, - 1.f, 0.f}; - std::vector topo = {1, 1, 0}; - int num_gpus = 2; - float alpha = 0.7; - std::vector correct_W = {0.f, 0.7f, - 0.7f, 0.f}; + std::vector W = {0.f, 1.f, 1.f, 0.f}; + std::vector topo = {1, 1, 0}; + int num_gpus = 2; + float alpha = 0.7; + std::vector correct_W = {0.f, 0.7f, 0.7f, 0.f}; mxnet::kvstore::UpdateWeight(&W, topo, num_gpus, alpha); ASSERT_EQ(W.size(), correct_W.size()); for (unsigned i = 0; i < W.size(); ++i) { @@ -486,25 +449,19 @@ TEST(GpuTopology, TestUpdateWeight) { // ComputeTreesFromRoot TEST(GpuTopology, TestComputeTreesFromRoot1) { - std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, - 2, 0, 3, 2, 1, 3, 1, 1, - 2, 3, 0, 3, 1, 1, 2, 1, - 3, 2, 3, 0, 1, 1, 1, 2, - 3, 1, 1, 1, 0, 2, 2, 3, - 1, 3, 1, 1, 2, 0, 3, 2, - 1, 1, 2, 1, 2, 3, 0, 3, - 1, 1, 1, 2, 3, 2, 3, 0}; - int num_gpus = 8; - int root = 0; - float alpha = 0.7; - bool backtrack = true; + std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, 2, 0, 3, 2, 1, 3, 1, 1, 2, 3, 0, 3, 1, 1, + 2, 1, 3, 2, 3, 0, 1, 1, 1, 2, 3, 1, 1, 1, 0, 2, 2, 3, 1, 3, 1, 1, + 2, 0, 3, 2, 1, 1, 2, 1, 2, 3, 0, 3, 1, 1, 1, 2, 3, 2, 3, 0}; + int num_gpus = 8; + int root = 0; + float alpha = 0.7; + bool backtrack = true; unsigned correct_topo_size = 15; unsigned correct_scan_size = 5; std::vector topo; std::vector scan; - mxnet::kvstore::ComputeTreesFromRoot(&W, num_gpus, root, alpha, backtrack, - &topo, &scan); + mxnet::kvstore::ComputeTreesFromRoot(&W, num_gpus, root, alpha, backtrack, &topo, &scan); ASSERT_EQ(topo.size(), correct_topo_size); ASSERT_EQ(scan.size(), correct_scan_size); @@ -513,11 +470,8 @@ TEST(GpuTopology, TestComputeTreesFromRoot1) { // IsConnected // Test on graph that is "disconnected" by NVLink TEST(GpuTopology, TestIsConnected1) { - std::vector W = {0, 0, 2, 0, - 0, 0, 0, 2, - 2, 0, 0, 0, - 0, 2, 0, 0}; - int num_gpus = 4; + std::vector W = {0, 0, 2, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0}; + int num_gpus = 4; bool connected = mxnet::kvstore::IsConnected(W, num_gpus); @@ -528,11 +482,8 @@ TEST(GpuTopology, TestIsConnected1) { // IsConnected // Test on graph that is "disconnected" by NVLink TEST(GpuTopology, TestIsConnected2) { - std::vector W = {1, 1, 2, 1, - 1, 1, 1, 2, - 2, 1, 1, 1, - 1, 2, 1, 1}; - int num_gpus = 4; + std::vector W = {1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1}; + int num_gpus = 4; bool connected = mxnet::kvstore::IsConnected(W, num_gpus); @@ -543,11 +494,8 @@ TEST(GpuTopology, TestIsConnected2) { // IsConnected // Test on graph that is "disconnected" by NVLink TEST(GpuTopology, TestIsConnected3) { - std::vector W = {1, 1, 2, 2, - 1, 1, 1, 2, - 2, 1, 1, 1, - 2, 2, 1, 1}; - int num_gpus = 4; + std::vector W = {1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1}; + int num_gpus = 4; bool connected = mxnet::kvstore::IsConnected(W, num_gpus); @@ -558,7 +506,7 @@ TEST(GpuTopology, TestIsConnected3) { // ComputeTreesTest with backtracking TEST(GpuTopology, TestComputeTrees1) { std::mt19937 gen(1); - float alpha = 0.7; + float alpha = 0.7; bool backtrack = true; for (int num_gpus = 2; num_gpus <= 8; ++num_gpus) { LOG(INFO) << "Testing " << num_gpus << " x " << num_gpus; @@ -571,7 +519,7 @@ TEST(GpuTopology, TestComputeTrees1) { // ComputeTreesTest with Kernighan-Lin TEST(GpuTopology, TestComputeTrees2) { std::mt19937 gen(1); - float alpha = 0.7; + float alpha = 0.7; bool backtrack = false; // Do 5 randomized tests per GPU count from 2 to 16 for (int num_gpus = 2; num_gpus <= 16; ++num_gpus) { @@ -583,35 +531,25 @@ TEST(GpuTopology, TestComputeTrees2) { } TEST(GpuTopology, TestPermuteMatrix) { - std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, - 2, 0, 3, 2, 1, 3, 1, 1, - 2, 3, 0, 3, 1, 1, 2, 1, - 3, 2, 3, 0, 1, 1, 1, 2, - 3, 1, 1, 1, 0, 2, 2, 3, - 1, 3, 1, 1, 2, 0, 3, 2, - 1, 1, 2, 1, 2, 3, 0, 3, - 1, 1, 1, 2, 3, 2, 3, 0}; + std::vector W = {0, 2, 2, 3, 3, 1, 1, 1, 2, 0, 3, 2, 1, 3, 1, 1, 2, 3, 0, 3, 1, 1, + 2, 1, 3, 2, 3, 0, 1, 1, 1, 2, 3, 1, 1, 1, 0, 2, 2, 3, 1, 3, 1, 1, + 2, 0, 3, 2, 1, 1, 2, 1, 2, 3, 0, 3, 1, 1, 1, 2, 3, 2, 3, 0}; std::vector P1 = {0, 1, 2, 3, 4, 5, 6, 7}; - std::vector A(8*8, 0); + std::vector A(8 * 8, 0); PermuteMatrix(W, P1, &A); - for (unsigned i=0; i < W.size(); ++i) + for (unsigned i = 0; i < W.size(); ++i) ASSERT_EQ(A[i], W[i]); } TEST(GpuTopology, TestKernighanLin1) { - std::vector W = {0, 1, 2, 3, 2, 4, - 1, 0, 1, 4, 2, 1, - 2, 1, 0, 3, 2, 1, - 3, 4, 3, 0, 4, 3, - 2, 2, 2, 4, 0, 2, - 4, 1, 1, 3, 2, 0}; + std::vector W = {0, 1, 2, 3, 2, 4, 1, 0, 1, 4, 2, 1, 2, 1, 0, 3, 2, 1, + 3, 4, 3, 0, 4, 3, 2, 2, 2, 4, 0, 2, 4, 1, 1, 3, 2, 0}; std::vector P(6, 0); std::vector> cluster_pairs; int num_partitions = 1; std::mt19937 gen(1); - bool stop = mxnet::kvstore::KernighanLin(W, &P, &num_partitions, - &cluster_pairs, &gen); + bool stop = mxnet::kvstore::KernighanLin(W, &P, &num_partitions, &cluster_pairs, &gen); std::vector> correct_pairs; correct_pairs.push_back(std::pair(0, 1)); @@ -630,26 +568,19 @@ TEST(GpuTopology, TestKernighanLin1) { error++; } EXPECT_TRUE(error == 0 || error == P.size()) - << "Where real value: " << error - << " not equal neither: " << 0 - << " nor: " << P.size() << "."; + << "Where real value: " << error << " not equal neither: " << 0 << " nor: " << P.size() + << "."; } TEST(GpuTopology, TestKernighanLin2) { - std::vector W = {0, 1, 0, 0, 1, 1, 0, 0, - 1, 0, 0, 0, 1, 1, 0, 0, - 0, 0, 0, 1, 0, 1, 1, 1, - 0, 0, 1, 0, 0, 0, 1, 1, - 1, 1, 0, 0, 0, 1, 0, 0, - 1, 1, 1, 0, 1, 0, 0, 0, - 0, 0, 1, 1, 0, 0, 0, 1, - 0, 0, 1, 1, 0, 0, 1, 0}; + std::vector W = {0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, + 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0}; std::vector P(8, 0); std::vector> cluster_pairs; int num_partitions = 1; std::mt19937 gen(1); - bool stop = mxnet::kvstore::KernighanLin(W, &P, &num_partitions, - &cluster_pairs, &gen); + bool stop = mxnet::kvstore::KernighanLin(W, &P, &num_partitions, &cluster_pairs, &gen); std::vector> correct_pairs; correct_pairs.push_back(std::pair(0, 1)); @@ -668,9 +599,8 @@ TEST(GpuTopology, TestKernighanLin2) { error++; } EXPECT_TRUE(error == 0 || error == P.size()) - << "Where real value: " << error - << " not equal neither: " << 0 - << " nor: " << P.size() << "."; + << "Where real value: " << error << " not equal neither: " << 0 << " nor: " << P.size() + << "."; } #endif // MXNET_USE_CUDA diff --git a/tests/cpp/misc/base.cc b/tests/cpp/misc/base.cc index b560f02a2a96..430ff693737f 100644 --- a/tests/cpp/misc/base.cc +++ b/tests/cpp/misc/base.cc @@ -27,20 +27,20 @@ using namespace std; * Test that different Context have different hash values */ TEST(ContextHashTest, ContextHashUnique) { - set hashes; - size_t collision_count = 0; - size_t total = 0; - for (size_t dev_type = 0; dev_type < 32; ++dev_type) { - for (size_t dev_id = 0; dev_id < 64; ++dev_id) { - auto ctx = Context::Create(static_cast(dev_type), dev_id); - size_t res = std::hash()(ctx); - auto insert_res = hashes.insert(res); - if (!insert_res.second) - ++collision_count; - ++total; - } + set hashes; + size_t collision_count = 0; + size_t total = 0; + for (size_t dev_type = 0; dev_type < 32; ++dev_type) { + for (size_t dev_id = 0; dev_id < 64; ++dev_id) { + auto ctx = Context::Create(static_cast(dev_type), dev_id); + size_t res = std::hash()(ctx); + auto insert_res = hashes.insert(res); + if (!insert_res.second) + ++collision_count; + ++total; } - double collision = collision_count / static_cast(total); - cout << "mxnet::Context std::hash collision ratio: " << collision << endl; - EXPECT_LE(collision, 0.04); + } + double collision = collision_count / static_cast(total); + cout << "mxnet::Context std::hash collision ratio: " << collision << endl; + EXPECT_LE(collision, 0.04); } diff --git a/tests/cpp/operator/activation_perf.cc b/tests/cpp/operator/activation_perf.cc index 0dfefe55f132..ad5f26f101f0 100644 --- a/tests/cpp/operator/activation_perf.cc +++ b/tests/cpp/operator/activation_perf.cc @@ -32,7 +32,7 @@ using namespace mxnet; typedef std::vector > kwargs_t; -const kwargs_t basic_activation_args = { }; +const kwargs_t basic_activation_args = {}; /*! * \brief Generic bidirectional sanity test @@ -41,25 +41,24 @@ TEST(ACTIVATION_PERF, ExecuteBidirectional) { using namespace std; mxnet::TShape shape({5, 5}); vector activations = { - "relu", - "sigmoid", - "log_sigmoid", - "mish", - "tanh", - "softrelu", - "softsign" - }; + "relu", "sigmoid", "log_sigmoid", "mish", "tanh", "softrelu", "softsign"}; for (const string& activation : activations) { kwargs_t activation_args = {{"act_type", activation}}; test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { shape }, test::op::CoreOpExecutor::ArgsWithOpName( - activation_args, "Activation", "_backward_Activation"), 1); + runner.RunBidirectional(false, + {shape}, + test::op::CoreOpExecutor::ArgsWithOpName( + activation_args, "Activation", "_backward_Activation"), + 1); } for (const string& activation : activations) { kwargs_t activation_args = {{"act_type", activation}}; test::op::CoreOperatorRunner runner; - runner.RunBidirectional(true, { shape }, test::op::CoreOpExecutor::ArgsWithOpName( - activation_args, "Activation", "_backward_Activation"), 1); + runner.RunBidirectional(true, + {shape}, + test::op::CoreOpExecutor::ArgsWithOpName( + activation_args, "Activation", "_backward_Activation"), + 1); } } @@ -70,29 +69,23 @@ TEST(ACTIVATION_PERF, TimingCPU) { kwargs_t kwargs = basic_activation_args; // Which math function is arbitrary since it will have roughly constant timing among approaches kwargs.push_back({"act_type", "tanh"}); - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Activation", - "_backward_Activation"); + kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Activation", "_backward_Activation"); mxnet::TShape shape({10, 10, 10, 10}); test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { shape }, kwargs, 1); + runner.RunBidirectional(false, {shape}, kwargs, 1); - std::vector shapes; + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest("Activation Operator CPU", false, false, kwargs, 2, 10, { shape }); + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest("Activation Operator CPU", false, false, kwargs, 2, 10, {shape}); } } @@ -104,21 +97,15 @@ TEST(ACTIVATION_PERF, TimingGPU) { kwargs_t kwargs = basic_activation_args; // Which math function is arbitrary since it will have roughly constant timing among approaches kwargs.push_back({"act_type", "tanh"}); - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Activation", - "_backward_Activation"); + kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Activation", "_backward_Activation"); mxnet::TShape shape({10, 10, 10, 10}); test::op::CoreOperatorRunner runner; - runner.RunBidirectional(true, { shape }, kwargs, 1); - std::vector shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest("Activation Operator GPU", true, false, kwargs, 2, 10, { shape }); + runner.RunBidirectional(true, {shape}, kwargs, 1); + std::vector shapes = { + {1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest("Activation Operator GPU", true, false, kwargs, 2, 10, {shape}); } } #endif // MXNET_USE_CUDA == 1 - diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc index e66b0b7696c6..39d039c0b55c 100644 --- a/tests/cpp/operator/batchnorm_test.cc +++ b/tests/cpp/operator/batchnorm_test.cc @@ -22,7 +22,7 @@ * \file batchnorm_test.cc * \brief batchnorm operator unit tests and utility functions * \author Chris Olivier -*/ + */ #include #include @@ -40,23 +40,23 @@ using namespace mxnet; #if !SIMPLE_DIMENSIONS static constexpr int BATCH_SIZE = 5; -static constexpr int CHANNELS = 3; -static constexpr int DEPTH = 2; -static constexpr int DH = 2; -static constexpr int DW = 3; +static constexpr int CHANNELS = 3; +static constexpr int DEPTH = 2; +static constexpr int DH = 2; +static constexpr int DW = 3; #else static constexpr int BATCH_SIZE = 1; -static constexpr int CHANNELS = 1; -static constexpr int DEPTH = 1; -static constexpr int DH = 3; -static constexpr int DW = 2; +static constexpr int CHANNELS = 1; +static constexpr int DEPTH = 1; +static constexpr int DH = 3; +static constexpr int DW = 2; #endif static constexpr int TIMING_BATCH_SIZE = 128; -static constexpr int TIMING_CHANNELS = 3; -static constexpr int TIMING_DEPTH = 2; -static constexpr int TIMING_DH = 28; -static constexpr int TIMING_DW = 28; +static constexpr int TIMING_CHANNELS = 3; +static constexpr int TIMING_DEPTH = 2; +static constexpr int TIMING_DH = 28; +static constexpr int TIMING_DW = 28; #define PRT(__lbl$, __var$) \ test::print(ctx.run_ctx, &(std::cout << (__lbl$) << ": "), (__var$), true) @@ -65,25 +65,35 @@ static constexpr int TIMING_DW = 28; * \brief Forward */ enum ForwardInputs { - /* in_data */ kForInData, kForGamma, kForBeta, - /* aux_states */ kForMovingMean, kForMovingVar + /* in_data */ kForInData, + kForGamma, + kForBeta, + /* aux_states */ kForMovingMean, + kForMovingVar }; enum ForwardOutputs { - /* outputs */ kForOutData , kForOutMean, kForOutVar + /* outputs */ kForOutData, + kForOutMean, + kForOutVar }; /*! * \brief Backward */ enum BackwardInputs { - /* out_grad */ bwd_out_grad_Grad, - /* out_data */ bwd_out_data_Mean, bwd_out_data_Var, - /* in_data */ bwd_in_data_Data, bwd_in_data_Gamma, bwd_in_data_Beta, - /* aux_states */ bwd_aux_states_MovingMean, bwd_aux_states_MovingVar + /* out_grad */ bwd_out_grad_Grad, + /* out_data */ bwd_out_data_Mean, + bwd_out_data_Var, + /* in_data */ bwd_in_data_Data, + bwd_in_data_Gamma, + bwd_in_data_Beta, + /* aux_states */ bwd_aux_states_MovingMean, + bwd_aux_states_MovingVar }; enum BackwardOutputs { - /* in_grad */ bwd_in_grad_Data /* Original input data */, - /* weight, bias*/ bwd_in_grad_Gamma, bwd_in_grad_Beta + /* in_grad */ bwd_in_grad_Data /* Original input data */, + /* weight, bias*/ bwd_in_grad_Gamma, + bwd_in_grad_Beta }; /** @@ -104,59 +114,62 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { public: using Super::ctx; - BNOperatorExecutor(const bool isGPU, const mxnet::TShape& inputShape, + BNOperatorExecutor(const bool isGPU, + const mxnet::TShape& inputShape, const test::op::kwargs_t& kwargs, const bool hasWeightAndBias = false) - : test::op::CoreOpExecutor(isGPU, { inputShape }) - , hasWeightAndBias_(hasWeightAndBias) { + : test::op::CoreOpExecutor(isGPU, {inputShape}), + hasWeightAndBias_(hasWeightAndBias) { param_.Init(kwargs); } - const NDArray *GetForwardInArray(const ForwardInputs idx) const { - const std::vector &arrs = Super::inputs(); + const NDArray* GetForwardInArray(const ForwardInputs idx) const { + const std::vector& arrs = Super::inputs(); CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetForwardOutArray(const ForwardOutputs idx) const { - const std::vector &arrs = Super::outputs(); + const NDArray* GetForwardOutArray(const ForwardOutputs idx) const { + const std::vector& arrs = Super::outputs(); CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetBackwardInArray(const BackwardInputs idx) { - const std::vector &arrs = Super::bwd_inputs(); + const NDArray* GetBackwardInArray(const BackwardInputs idx) { + const std::vector& arrs = Super::bwd_inputs(); CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - const NDArray *GetBackwardOutArray(const BackwardOutputs idx) const { - const std::vector &arrs = Super::bwd_outputs(); + const NDArray* GetBackwardOutArray(const BackwardOutputs idx) const { + const std::vector& arrs = Super::bwd_outputs(); CHECK_LT(idx, arrs.size()); return &arrs[idx]; } - NDArray *GetArray(const ForwardInputs idx) { - return const_cast(GetForwardInArray(idx)); + NDArray* GetArray(const ForwardInputs idx) { + return const_cast(GetForwardInArray(idx)); } - NDArray *GetArray(const ForwardOutputs idx) { - return const_cast(GetForwardOutArray(idx)); + NDArray* GetArray(const ForwardOutputs idx) { + return const_cast(GetForwardOutArray(idx)); } - NDArray *GetArray(const BackwardOutputs idx) { - return const_cast(GetBackwardOutArray(idx)); + NDArray* GetArray(const BackwardOutputs idx) { + return const_cast(GetBackwardOutArray(idx)); } - NDArray *GetArray(const BackwardInputs idx) { - return const_cast(GetBackwardInArray(idx)); + NDArray* GetArray(const BackwardInputs idx) { + return const_cast(GetBackwardInArray(idx)); } - inline const TBlob& Blob(const NDArray *arr) { return arr->data(); } + inline const TBlob& Blob(const NDArray* arr) { + return arr->data(); + } - template + template const TBlob& GetBlob(const EnumType idx) const { - return const_cast *>(this)->GetArray(idx)->data(); + return const_cast*>(this)->GetArray(idx)->data(); } void resetForward() override { @@ -179,30 +192,26 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { double val = 0; test::patternFill(ctx().run_ctx, &GetBlob(kForInData), [&val]() -> double { return val += 1; }); - MSHADOW_TYPE_SWITCH( - GetBlob(kForGamma).type_flag_, - DTypeX, { - const TBlob& blob = GetBlob(kForGamma); - test::fill(ctx().run_ctx, blob, DTypeX(1)); - if (hasWeightAndBias_) { - if (blob.size(0) > 1) { - blob.dptr()[1] = DTypeX(3); - } + MSHADOW_TYPE_SWITCH(GetBlob(kForGamma).type_flag_, DTypeX, { + const TBlob& blob = GetBlob(kForGamma); + test::fill(ctx().run_ctx, blob, DTypeX(1)); + if (hasWeightAndBias_) { + if (blob.size(0) > 1) { + blob.dptr()[1] = DTypeX(3); } - }); - MSHADOW_TYPE_SWITCH( - GetBlob(kForBeta).type_flag_, - DTypeX, { - const TBlob& blob = GetBlob(kForBeta); - if (!hasWeightAndBias_) { - test::fill(ctx().run_ctx, blob, DTypeX(0)); - } else { // This will cause forward pass check to fail when calculating sum == 0 - test::fill(ctx().run_ctx, blob, DTypeX(1)); - if (blob.size(0) > 0) { - blob.dptr()[0] = DTypeX(3); - } + } + }); + MSHADOW_TYPE_SWITCH(GetBlob(kForBeta).type_flag_, DTypeX, { + const TBlob& blob = GetBlob(kForBeta); + if (!hasWeightAndBias_) { + test::fill(ctx().run_ctx, blob, DTypeX(0)); + } else { // This will cause forward pass check to fail when calculating sum == 0 + test::fill(ctx().run_ctx, blob, DTypeX(1)); + if (blob.size(0) > 0) { + blob.dptr()[0] = DTypeX(3); } - }); + } + }); // Init the moving data (all mean = 0, all var = 1) test::try_fill(ctx().run_ctx, &GetBlob(kForMovingMean), 0); @@ -216,34 +225,29 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { // Join forward input and in_data array double val = 0; - test::patternFill(ctx().run_ctx, &GetBlob(bwd_in_data_Data), [&val]() -> double { - return val += 1; + test::patternFill( + ctx().run_ctx, &GetBlob(bwd_in_data_Data), [&val]() -> double { return val += 1; }); + + MSHADOW_TYPE_SWITCH(GetBlob(bwd_in_data_Gamma).type_flag_, DTypeX, { + const TBlob& blob = GetBlob(bwd_in_data_Gamma); + test::fill(ctx().run_ctx, blob, DTypeX(1)); + if (hasWeightAndBias_) { + if (blob.size(0) > 1) { + blob.dptr()[1] = DTypeX(3); + } + } }); - - MSHADOW_TYPE_SWITCH( - GetBlob(bwd_in_data_Gamma).type_flag_, - DTypeX, { - const TBlob& blob = GetBlob(bwd_in_data_Gamma); + MSHADOW_TYPE_SWITCH(GetBlob(bwd_in_data_Beta).type_flag_, DTypeX, { + const TBlob& blob = GetBlob(bwd_in_data_Beta); + if (!hasWeightAndBias_) { + test::fill(ctx().run_ctx, blob, DTypeX(0)); + } else { // This will cause forward pass check to fail when calculating sum == 0 test::fill(ctx().run_ctx, blob, DTypeX(1)); - if (hasWeightAndBias_) { - if (blob.size(0) > 1) { - blob.dptr()[1] = DTypeX(3); - } + if (blob.size(0) > 0) { + blob.dptr()[0] = DTypeX(3); } - }); - MSHADOW_TYPE_SWITCH( - GetBlob(bwd_in_data_Beta).type_flag_, - DTypeX, { - const TBlob& blob = GetBlob(bwd_in_data_Beta); - if (!hasWeightAndBias_) { - test::fill(ctx().run_ctx, blob, DTypeX(0)); - } else { // This will cause forward pass check to fail when calculating sum == 0 - test::fill(ctx().run_ctx, blob, DTypeX(1)); - if (blob.size(0) > 0) { - blob.dptr()[0] = DTypeX(3); - } - } - }); + } + }); // Join aux arrays test::try_fill(ctx().run_ctx, &GetBlob(bwd_aux_states_MovingMean), 0); @@ -253,8 +257,8 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { test::try_fill(ctx().run_ctx, &GetBlob(bwd_out_data_Var), 1.0); val = -.001; - test::patternFill(ctx().run_ctx, &GetBlob(bwd_out_grad_Grad), [&val]() -> double { - return val += 0.01; }); + test::patternFill( + ctx().run_ctx, &GetBlob(bwd_out_grad_Grad), [&val]() -> double { return val += 0.01; }); } const bool hasWeightAndBias_; // This will cause forward pass validation to fail @@ -272,7 +276,7 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor { * */ /*! \brief Validate batch norm test outputs */ -template +template class BatchNormValidator : public test::op::Validator { typedef test::op::Validator Super; @@ -280,13 +284,13 @@ class BatchNormValidator : public test::op::Validator { BatchNormValidator() = delete; // NOLINT /*! \brief Check batch norm output - 1D */ - static void checkBatchNorm1D(const TBlob *blob) { + static void checkBatchNorm1D(const TBlob* blob) { const size_t dim = static_cast(blob->ndim()); CHECK_EQ(dim, 3U); - const size_t num = blob->shape_[0]; // batch size + const size_t num = blob->shape_[0]; // batch size const size_t channels = blob->shape_[1]; - const size_t length = blob->shape_[2]; + const size_t length = blob->shape_[2]; size_t itemCount = 0; @@ -313,15 +317,13 @@ class BatchNormValidator : public test::op::Validator { // expect zero mean EXPECT_NEAR(0, sum, kErrorBound); if (!Super::isNear(AccReal(0), sum, kErrorBound)) { - LOG(WARNING) << "Sum is not close enough to zero: " - << saveSum << " (" << sum << "), " + LOG(WARNING) << "Sum is not close enough to zero: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; } // expect unit variance EXPECT_NEAR(1, var, kErrorBound); if (!Super::isNear(AccReal(1), var, kErrorBound)) { - LOG(WARNING) << "Variance is not close enough to 1: " - << saveSum << " (" << sum << "), " + LOG(WARNING) << "Variance is not close enough to 1: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; } } @@ -329,14 +331,14 @@ class BatchNormValidator : public test::op::Validator { } /*! \brief Check batch norm output - 2D */ - static void checkBatchNorm2D(const TBlob *blob) { + static void checkBatchNorm2D(const TBlob* blob) { const size_t dim = static_cast(blob->ndim()); CHECK_EQ(dim, 4U); - const size_t num = blob->shape_[0]; // batch size + const size_t num = blob->shape_[0]; // batch size const size_t channels = blob->shape_[1]; - const size_t height = blob->shape_[2]; - const size_t width = blob->shape_[3]; + const size_t height = blob->shape_[2]; + const size_t width = blob->shape_[3]; size_t itemCount = 0, nonZero = 0; @@ -370,16 +372,14 @@ class BatchNormValidator : public test::op::Validator { // expect zero mean EXPECT_NEAR(0, sum, kErrorBound); if (!Super::isNear(AccReal(0), sum, kErrorBound)) { - LOG(WARNING) << "Sum is not close enough to zero: " - << saveSum << " (" << sum << "), " + LOG(WARNING) << "Sum is not close enough to zero: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; test::print(RunContext(), &(std::cerr << "Mean problem:" << std::endl), *blob); } // expect unit variance EXPECT_NEAR(1, var, kErrorBound); if (!Super::isNear(AccReal(1), var, kErrorBound)) { - LOG(WARNING) << "Variance is not close enough to 1: " - << saveSum << " (" << sum << "), " + LOG(WARNING) << "Variance is not close enough to 1: " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; test::print(RunContext(), &(std::cerr << "Variance problem:" << std::endl), *blob); } @@ -388,14 +388,14 @@ class BatchNormValidator : public test::op::Validator { } /*! \brief Check batch norm output - 3D */ - static void checkBatchNorm3D(const TBlob *blob) { + static void checkBatchNorm3D(const TBlob* blob) { const size_t dim = static_cast(blob->ndim()); CHECK_EQ(dim, 5U); - const size_t num = blob->shape_[0]; // batch size + const size_t num = blob->shape_[0]; // batch size const size_t channels = blob->shape_[1]; - const size_t depth = blob->shape_[2]; - const size_t height = blob->shape_[3]; - const size_t width = blob->shape_[4]; + const size_t depth = blob->shape_[2]; + const size_t height = blob->shape_[3]; + const size_t width = blob->shape_[4]; size_t itemCount = 0; @@ -406,8 +406,8 @@ class BatchNormValidator : public test::op::Validator { for (size_t k = 0; k < height; ++k) { for (size_t l = 0; l < width; ++l) { const AccReal data = test::data_at(blob, {i, j, d, k, l}); - sum = sum + data; - var = var + (data * data); + sum = sum + data; + var = var + (data * data); ++itemCount; } } @@ -425,15 +425,13 @@ class BatchNormValidator : public test::op::Validator { // expect zero mean EXPECT_NEAR(0, sum, kErrorBound); if (!Super::isNear(AccReal(0), sum, kErrorBound)) { - LOG(WARNING) << "Sum is not close enough to zero " - << saveSum << " (" << sum << "), " + LOG(WARNING) << "Sum is not close enough to zero " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; } // expect unit variance EXPECT_NEAR(1, var, kErrorBound); if (!Super::isNear(AccReal(1), var, kErrorBound)) { - LOG(WARNING) << "Variance is not close enough to 1 " - << saveSum << " (" << sum << "), " + LOG(WARNING) << "Variance is not close enough to 1 " << saveSum << " (" << sum << "), " << saveVar << " (" << var << ")"; } } @@ -447,7 +445,7 @@ class BatchNormValidator : public test::op::Validator { const EnumType idx, bool print = false) { test::CAccessAsCPU cpu1(i1.ctx().run_ctx, i1.GetBlob(idx), false), - cpu2(i2.ctx().run_ctx, i2.GetBlob(idx), false); + cpu2(i2.ctx().run_ctx, i2.GetBlob(idx), false); const TBlob& b1 = cpu1(); const TBlob& b2 = cpu2(); if (print && test::debug_output) { @@ -463,9 +461,9 @@ class BatchNormValidator : public test::op::Validator { } /*! \brief Check batch norm output */ - template + template static void validateForward(const RunContext& run_ctx, const BNOperatorProp& data) { - const TBlob &outputBlob = data.GetBlob(ForwardOutputs::kForOutData); + const TBlob& outputBlob = data.GetBlob(ForwardOutputs::kForOutData); if (test::debug_output) { test::print(run_ctx, &(std::cout << "Fwd Output Blob:"), outputBlob, true, true); } @@ -487,20 +485,20 @@ class BatchNormValidator : public test::op::Validator { }); } -#define TEST_ISTRUE(__args$) \ - do { \ - bool _rc; \ +#define TEST_ISTRUE(__args$) \ + do { \ + bool _rc; \ EXPECT_TRUE((_rc = (__args$))); \ - if (!_rc) { \ - rc = false; \ - } \ + if (!_rc) { \ + rc = false; \ + } \ } while (0) /*! \brief Compare entire operator data between two test sets */ - template + template static bool compare( - const test::op::OpInfo>& info_1, - const test::op::OpInfo>& info_2) { + const test::op::OpInfo>& info_1, + const test::op::OpInfo>& info_2) { bool rc = true; // Input TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, ForwardInputs::kForInData)); @@ -517,13 +515,10 @@ class BatchNormValidator : public test::op::Validator { #endif if (!info_2.prop_->getParam().use_global_stats) { - TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, - BackwardInputs::bwd_out_data_Mean)); - TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, - BackwardInputs::bwd_out_data_Var)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, BackwardInputs::bwd_out_data_Mean)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, BackwardInputs::bwd_out_data_Var)); // InGrad - TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, - BackwardOutputs::bwd_in_grad_Data)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, BackwardOutputs::bwd_in_grad_Data)); #if 0 TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, BackwardOutputs::bwd_in_grad_Gamma)); @@ -531,8 +526,7 @@ class BatchNormValidator : public test::op::Validator { BackwardOutputs::bwd_in_grad_Beta)); #endif // OutGrad - TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, - BackwardInputs::bwd_out_grad_Grad)); + TEST_ISTRUE(compare(*info_1.executor_, *info_2.executor_, BackwardInputs::bwd_out_grad_Grad)); } return rc; } @@ -549,24 +543,22 @@ class BatchNormValidator : public test::op::Validator { * */ static const test::op::kwargs_t blank_kwargs; -static const test::op::kwargs_t blank_kwargs_nocudnn = { - {"cudnn_off", "True"} }; -static const test::op::kwargs_t nonfixgamma_kwargs = { - {"fix_gamma", "False"} }; -static const test::op::kwargs_t nonfixgamma_kwargs_nocudnn = { - {"fix_gamma", "False"}, {"cudnn_off", "True"} }; -static const test::op::kwargs_t useglobalstats_kwargs = { - {"use_global_stats", "True"} }; -static const test::op::kwargs_t useglobalstats_kwargs_nocudnn = { - {"use_global_stats", "True"}, {"cudnn_off", "True"} }; -static const test::op::kwargs_t nfs_ugs_kwargs = { - {"fix_gamma", "False"}, {"use_global_stats", "True"}}; -static const test::op::kwargs_t nfs_ugs_kwargs_nocudnn = { - {"fix_gamma", "False"}, {"use_global_stats", "True"}, {"cudnn_off", "True"} }; +static const test::op::kwargs_t blank_kwargs_nocudnn = {{"cudnn_off", "True"}}; +static const test::op::kwargs_t nonfixgamma_kwargs = {{"fix_gamma", "False"}}; +static const test::op::kwargs_t nonfixgamma_kwargs_nocudnn = {{"fix_gamma", "False"}, + {"cudnn_off", "True"}}; +static const test::op::kwargs_t useglobalstats_kwargs = {{"use_global_stats", "True"}}; +static const test::op::kwargs_t useglobalstats_kwargs_nocudnn = {{"use_global_stats", "True"}, + {"cudnn_off", "True"}}; +static const test::op::kwargs_t nfs_ugs_kwargs = {{"fix_gamma", "False"}, + {"use_global_stats", "True"}}; +static const test::op::kwargs_t nfs_ugs_kwargs_nocudnn = {{"fix_gamma", "False"}, + {"use_global_stats", "True"}, + {"cudnn_off", "True"}}; #if !DISABLE_VALIDATION static bool isUGS(const test::op::kwargs_t& kwargs) { - for (const auto & kwarg : kwargs) { + for (const auto& kwarg : kwargs) { if (!kwarg.first.compare("use_global_stats")) { return kwarg.second.compare("True") == 0; } @@ -585,9 +577,12 @@ static bool isUGS(const test::op::kwargs_t& kwargs) { * __/ | | | * |___/ |_| */ -template -static StreamType& _DBPRT(const RunContext& run_ctx, const char *label, - StreamType *os, const OperatorExecutor& obj, const BlobType type) { +template +static StreamType& _DBPRT(const RunContext& run_ctx, + const char* label, + StreamType* os, + const OperatorExecutor& obj, + const BlobType type) { *os << label << ": "; test::print(RunContext(), os, test::CAccessAsCPU(run_ctx, obj.GetBlob(type), false)()); return *os; @@ -595,10 +590,10 @@ static StreamType& _DBPRT(const RunContext& run_ctx, const char *label, #define DBPRT(__os, __obj, __type$) _DBPRT(run_ctx, #__type$, __os, __obj, __type$) -template -static StreamType& dumpF(StreamType *os, +template +static StreamType& dumpF(StreamType* os, const test::op::OpInfo& prop, - const size_t x = 0, + const size_t x = 0, const bool force = test::debug_output) { if (force) { *os << std::endl; @@ -622,10 +617,10 @@ static StreamType& dumpF(StreamType *os, return *os; } -template -static StreamType& dumpB(StreamType *os, +template +static StreamType& dumpB(StreamType* os, const test::op::OpInfo& prop, - const size_t x = 0, + const size_t x = 0, const bool force = test::debug_output) { if (force) { *os << std::endl; @@ -659,24 +654,26 @@ static StreamType& dumpB(StreamType *os, * */ /*! \brief Test batch norm operator forward pass */ -template +template static test::op::OpInfo TestBatchNormOperatorForward( - bool isGPU, - const mxnet::TShape& inputShape, - const std::vector >& kwargs, - const size_t count = 1) { + bool isGPU, + const mxnet::TShape& inputShape, + const std::vector>& kwargs, + const size_t count = 1) { #if MXNET_USE_CUDA if (isGPU && !test::unitTestsWithCuda) { LOG(INFO) << "GPU not found, running test as non-GPU"; } #else - isGPU = false; + isGPU = false; #endif - test::op::OpInfo info = test::op::createOpAndInfoF< - OperatorProp, OperatorExecutor>( - OperatorExecutor::ArgsWithOpName(kwargs, "BatchNorm", "_backward_BatchNorm"), - isGPU, inputShape, kwargs); + test::op::OpInfo info = + test::op::createOpAndInfoF( + OperatorExecutor::ArgsWithOpName(kwargs, "BatchNorm", "_backward_BatchNorm"), + isGPU, + inputShape, + kwargs); info.executor_->initForward(*info.prop_, &info.in_type_); @@ -685,8 +682,10 @@ static test::op::OpInfo TestBatchNormOperatorFor #if !DISABLE_VALIDATION if (!isUGS(kwargs)) { BatchNormValidator::validateForward( - info.executor_->ctx().run_ctx, *info.executor_); + typename OperatorExecutor::AccRealType>::validateForward(info.executor_ + ->ctx() + .run_ctx, + *info.executor_); } #endif @@ -694,10 +693,10 @@ static test::op::OpInfo TestBatchNormOperatorFor } /*! \brief Test batch norm operator backward pass */ -template +template static test::op::OpInfo runOperatorBackward( - test::op::OpInfo *info, - const size_t count = 1) { + test::op::OpInfo* info, + const size_t count = 1) { info->executor_->initBackward(*info->prop_, &info->in_type_); info->executor_->backward(count); @@ -706,25 +705,25 @@ static test::op::OpInfo runOperatorBackward( static constexpr size_t CYCLE_COUNT = 3; -template +template static test::op::OpInfoPair testForwardAndBackward( const bool isGPU1, const bool isGPU2, - const mxnet::TShape &inputShape, + const mxnet::TShape& inputShape, const test::op::kwargs_t& kwargs, - const size_t count = 1, + const size_t count = 1, const size_t cycleCount = CYCLE_COUNT) { test::op::OpInfo info_1 = - TestBatchNormOperatorForward(isGPU1, inputShape, - kwargs, count); + TestBatchNormOperatorForward( + isGPU1, inputShape, kwargs, count); test::op::OpInfo info_2 = - TestBatchNormOperatorForward(isGPU2, inputShape, - kwargs, count); + TestBatchNormOperatorForward( + isGPU2, inputShape, kwargs, count); size_t thisCount = 0; - using DType = typename OperatorExecutor::DataType; + using DType = typename OperatorExecutor::DataType; using AccReal = typename OperatorExecutor::AccRealType; do { @@ -743,9 +742,8 @@ static test::op::OpInfoPair test // Check that everything is the same after the forward pass const bool b1 = BatchNormValidator::compare(info_1, info_2); - const bool b2 = BatchNormValidator::compare(*info_1.executor_, - *info_2.executor_, - kForInData, false); + const bool b2 = BatchNormValidator::compare( + *info_1.executor_, *info_2.executor_, kForInData, false); if (!b1 || !b2) { dumpF(&std::cout, info_1, 1, true); dumpF(&std::cout, info_2, 2, true); @@ -774,23 +772,17 @@ static test::op::OpInfoPair test } } while (++thisCount < cycleCount); - return { info_1, info_2 }; + return {info_1, info_2}; } -template -static test::op::OpInfoPair -testForwardAndBackward(const bool isGPU, - const mxnet::TShape &inputShape, - const test::op::kwargs_t kwargs, - const size_t count = 1, - const size_t cycleCount = CYCLE_COUNT -) { +template +static test::op::OpInfoPair testForwardAndBackward( + const bool isGPU, + const mxnet::TShape& inputShape, + const test::op::kwargs_t kwargs, + const size_t count = 1, + const size_t cycleCount = CYCLE_COUNT) { return testForwardAndBackward( - isGPU, - isGPU, - inputShape, - kwargs, - count, - cycleCount); + isGPU, isGPU, inputShape, kwargs, count, cycleCount); } /** @@ -811,28 +803,30 @@ struct BatchNormCoreOpProp : public mxnet::test::op::CoreOpProp { params_.Init(kwargs, dmlc::parameter::kAllowUnknown); } - const mxnet::op::BatchNormParam& getParam() const { return params_; } + const mxnet::op::BatchNormParam& getParam() const { + return params_; + } mxnet::op::BatchNormParam params_; }; -template +template static test::op::OpInfoPair testBNForwardAndBackward2D(const bool isGPU, - const mxnet::TShape &inputShape, + const mxnet::TShape& inputShape, const test::op::kwargs_t& kwargs) { CHECK_EQ(inputShape.ndim(), 4); // V1 can only handle 2D return testForwardAndBackward( - isGPU, isGPU, inputShape, kwargs); + isGPU, isGPU, inputShape, kwargs); } -template +template static test::op::OpInfoPair testBNForwardAndBackward(const bool isGPU, - const mxnet::TShape &inputShape, + const mxnet::TShape& inputShape, const test::op::kwargs_t& kwargs) { return testForwardAndBackward( - isGPU, isGPU, inputShape, kwargs); + isGPU, isGPU, inputShape, kwargs); } /** @@ -846,11 +840,9 @@ testBNForwardAndBackward(const bool isGPU, * |___/ */ TEST(BATCH_NORM, TestSanityForwaredAndBackward) { - MSHADOW_REAL_TYPE_SWITCH_EX( - mshadow::kFloat32, - DType, AccReal, { + MSHADOW_REAL_TYPE_SWITCH_EX(mshadow::kFloat32, DType, AccReal, { testBNForwardAndBackward2D>( - false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); + false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); }); } @@ -864,46 +856,44 @@ TEST(BATCH_NORM, TestSanityForwaredAndBackward) { * * */ -static const std::vector v2_types = { - mshadow::kFloat32, - mshadow::kFloat64, - mshadow::kFloat16 -}; +static const std::vector v2_types = {mshadow::kFloat32, + mshadow::kFloat64, + mshadow::kFloat16}; TEST(BATCH_NORM, Test1DForward) { - for (const mshadow::TypeFlag type : v2_types) { + for (const mshadow::TypeFlag type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { testBNForwardAndBackward>( - false, {BATCH_SIZE, CHANNELS, DW}, blank_kwargs); + false, {BATCH_SIZE, CHANNELS, DW}, blank_kwargs); }); } } TEST(BATCH_NORM, Test2DForward) { - for (int type : v2_types) { + for (int type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { testBNForwardAndBackward>( - false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); + false, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); }); } } TEST(BATCH_NORM, Test3DForward) { - for (const mshadow::TypeFlag type : v2_types) { + for (const mshadow::TypeFlag type : v2_types) { MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { testBNForwardAndBackward>( - false, {BATCH_SIZE, CHANNELS, DEPTH, DH, DW}, blank_kwargs); + false, {BATCH_SIZE, CHANNELS, DEPTH, DH, DW}, blank_kwargs); }); } } -template +template static void timingTest(const std::string& label, const bool isGPU, const bool stochastic, const test::op::kwargs_t& kwargs, const int dim = 0, - size_t count = 1) { + size_t count = 1) { std::cout << std::endl << std::flush; #ifdef NDEBUG @@ -930,10 +920,10 @@ static void timingTest(const std::string& label, do { batchSize = stochastic ? test::rangedRand(1U, BATCH_SIZE * 2U) : TIMING_BATCH_SIZE; - channels = stochastic ? test::rangedRand(1U, CHANNELS * 2U) : TIMING_CHANNELS; - depth = stochastic ? test::rangedRand(1U, DEPTH * 2U) : TIMING_DEPTH; - height = stochastic ? test::rangedRand(1U, DH * 2U) : TIMING_DH; - width = stochastic ? test::rangedRand(1U, DW * 2U) : TIMING_DW; + channels = stochastic ? test::rangedRand(1U, CHANNELS * 2U) : TIMING_CHANNELS; + depth = stochastic ? test::rangedRand(1U, DEPTH * 2U) : TIMING_DEPTH; + height = stochastic ? test::rangedRand(1U, DH * 2U) : TIMING_DH; + width = stochastic ? test::rangedRand(1U, DW * 2U) : TIMING_DW; } while (stochastic && (height * width) == 1U); const size_t D = dim ? dim - 1U : test::rangedRand(0U, 2U); @@ -942,21 +932,15 @@ static void timingTest(const std::string& label, switch (D) { case 0: info = TestBatchNormOperatorForward( - isGPU, - {batchSize, channels, width}, - kwargs, count); + isGPU, {batchSize, channels, width}, kwargs, count); break; case 1: info = TestBatchNormOperatorForward( - isGPU, - {batchSize, channels, height, width}, - kwargs, count); + isGPU, {batchSize, channels, height, width}, kwargs, count); break; case 2: info = TestBatchNormOperatorForward( - isGPU, - {batchSize, channels, depth, height, width}, - kwargs, count); + isGPU, {batchSize, channels, depth, height, width}, kwargs, count); break; default: CHECK(false) << "rangedRand() returned unexpected value"; @@ -965,36 +949,38 @@ static void timingTest(const std::string& label, runOperatorBackward(&info, count); timing += info.executor_->GetTiming(); } - } while (false); + } + while (false) + ; timing.print(&std::cout, label); std::cout << std::endl << std::flush; } #if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5 -#define GPU_TEST_DIMENSIONS 2 /* Only support 2D */ +#define GPU_TEST_DIMENSIONS 2 /* Only support 2D */ #else -#define GPU_TEST_DIMENSIONS 0 /* Allow stochastic */ -#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5 +#define GPU_TEST_DIMENSIONS 0 /* Allow stochastic */ +#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5 /*! \brief Stress-test random batch size/channels/dimension(s) */ TEST(BATCH_NORM, DISABLED_TestStochasticTiming_2D) { // Test is disabled due to suspected flakiness // https://github.com/apache/incubator-mxnet/issues/14411 - MSHADOW_REAL_TYPE_SWITCH_EX( - mshadow::kFloat32, DType, AccReal, - { - timingTest>( - "RANDOM: BatchNormCoreOpProp", false, true, - blank_kwargs_nocudnn, GPU_TEST_DIMENSIONS); }); + MSHADOW_REAL_TYPE_SWITCH_EX(mshadow::kFloat32, DType, AccReal, { + timingTest>( + "RANDOM: BatchNormCoreOpProp", false, true, blank_kwargs_nocudnn, GPU_TEST_DIMENSIONS); + }); #if MXNET_USE_CUDA if (test::unitTestsWithCuda) { - MSHADOW_REAL_TYPE_SWITCH_EX( - mshadow::kFloat32, DType, AccReal, - { - timingTest>( - "RANDOM: BatchNormCoreOpProp", true, true, - blank_kwargs_nocudnn, GPU_TEST_DIMENSIONS); }); + MSHADOW_REAL_TYPE_SWITCH_EX(mshadow::kFloat32, DType, AccReal, { + timingTest>( + "RANDOM: BatchNormCoreOpProp", + true, + true, + blank_kwargs_nocudnn, + GPU_TEST_DIMENSIONS); + }); } #endif } @@ -1010,50 +996,37 @@ TEST(BATCH_NORM, TestTiming_2D) { if (mxnet::test::quick_test) { THISCOUNT = 1; } -MSHADOW_REAL_TYPE_SWITCH_EX( - mshadow::kFloat32, DType, AccReal, { + MSHADOW_REAL_TYPE_SWITCH_EX(mshadow::kFloat32, DType, AccReal, { #if MXNET_USE_ONEDNN == 1 - // MKL - timingTest>( - "MKL BatchNormProp 2D", - false, false, - blank_kwargs_nocudnn, - 2, THISCOUNT); + // MKL + timingTest>( + "MKL BatchNormProp 2D", false, false, blank_kwargs_nocudnn, 2, THISCOUNT); #endif // MXNET_USE_ONEDNN == 1 - // CPU - test::ScopeSet disableMKL(&mxnet::op::batchnorm::disable_mkl, true); - timingTest>( - "BatchNormProp 2D", - false, false, - blank_kwargs_nocudnn, - 2, THISCOUNT); -#if MXNET_USE_CUDA - if (test::unitTestsWithCuda) { - // CUDA + // CPU + test::ScopeSet disableMKL(&mxnet::op::batchnorm::disable_mkl, true); timingTest>( - "BatchNormProp 2D", - true, false, - blank_kwargs_nocudnn, - 2, THISCOUNT); + "BatchNormProp 2D", false, false, blank_kwargs_nocudnn, 2, THISCOUNT); +#if MXNET_USE_CUDA + if (test::unitTestsWithCuda) { + // CUDA + timingTest>( + "BatchNormProp 2D", true, false, blank_kwargs_nocudnn, 2, THISCOUNT); #if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5 - // CUDA-CUDNN - timingTest>( - "CUDNN BatchNormProp 2D", - true, false, - blank_kwargs, - 2, THISCOUNT); + // CUDA-CUDNN + timingTest>( + "CUDNN BatchNormProp 2D", true, false, blank_kwargs, 2, THISCOUNT); #endif - } + } #endif -}); + }); } #endif // _WIN32 -inline std::ostream& operator << (std::ostream& os, const test::op::kwargs_t& kwargs) { +inline std::ostream& operator<<(std::ostream& os, const test::op::kwargs_t& kwargs) { if (!kwargs.empty()) { os << "["; size_t count = 0; - for (const auto &item : kwargs) { + for (const auto& item : kwargs) { if (count++) { os << ", "; } @@ -1119,20 +1092,18 @@ TEST(BATCH_NORM, TestIterAll) { #ifndef _WIN32 TEST(BATCH_NORM, TestBackward3D) { - MSHADOW_REAL_TYPE_SWITCH_EX( - mshadow::kFloat32, DType, AccReal, - { - const mxnet::TShape inputShape({2, 3, 2, 3, 5}); - test::op::OpInfo> info = + MSHADOW_REAL_TYPE_SWITCH_EX(mshadow::kFloat32, DType, AccReal, { + const mxnet::TShape inputShape({2, 3, 2, 3, 5}); + test::op::OpInfo> info = TestBatchNormOperatorForward>( - false, inputShape, blank_kwargs); - info.executor_->initBackward(*info.prop_, &info.in_type_); - runOperatorBackward(&info); - }); + false, inputShape, blank_kwargs); + info.executor_->initBackward(*info.prop_, &info.in_type_); + runOperatorBackward(&info); + }); } #endif // _WIN32 -template +template class ChannelAxisTestData { protected: enum Mode { LOAD, SAVE }; @@ -1140,7 +1111,7 @@ class ChannelAxisTestData { void loadOrSave(const RunContext& run_ctx, const TBlob& blob, int channel_axis, const Mode mode) { test::CAccessAsCPU cpu_blob(run_ctx, blob, true); mxnet::op::batchnorm::BNTensor3 tensor3(cpu_blob(), channel_axis); - const mxnet::TShape &shape = blob.shape_; + const mxnet::TShape& shape = blob.shape_; CHECK_GT(shape.ndim(), 0); if (channel_axis < 0) { channel_axis = shape.ndim() + channel_axis; @@ -1149,8 +1120,8 @@ class ChannelAxisTestData { const size_t channel_count = shape[channel_axis]; std::vector indexes(channel_count, 0); for (size_t outer = 0, outerCount = tensor3.OuterSize(); outer < outerCount; ++outer) { - for (size_t channel = 0, channelCount = tensor3.ChannelCount(); - channel < channelCount; ++channel) { + for (size_t channel = 0, channelCount = tensor3.ChannelCount(); channel < channelCount; + ++channel) { CHECK_LT(channel, channel_data_.size()); for (size_t inner = 0, innerCount = tensor3.InnerSize(); inner < innerCount; ++inner) { CHECK_LT(indexes[channel], channel_data_[channel].size()); @@ -1165,7 +1136,7 @@ class ChannelAxisTestData { } public: - std::vector> channel_data_; + std::vector> channel_data_; static void print(const std::string& label, const std::vector>& m) { if (test::debug_output) { @@ -1173,15 +1144,14 @@ class ChannelAxisTestData { std::cout << label << ": "; } for (size_t i = 0, n = m.size(); i < n; ++i) { - const std::vector &vec = m[i]; + const std::vector& vec = m[i]; for (size_t j = 0, jn = vec.size(); j < jn; ++j) { if (j) { std::cout << ", "; } const DType val = vec[j]; std::cout << std::fixed << std::setw(7) - << std::setprecision(mxnet::test::MPRINT_PRECISION) - << std::right << val; + << std::setprecision(mxnet::test::MPRINT_PRECISION) << std::right << val; } std::cout << std::endl; } @@ -1217,16 +1187,16 @@ class ChannelAxisTestData { } }; -template +template static void compare(const RunContext& run_ctx, const TBlob& blob, const std::vector& vals) { CHECK_EQ(blob.Size(), vals.size()); test::CAccessAsCPU cpu_blob(run_ctx, blob, false); - const DType *v = cpu_blob().dptr(); + const DType* v = cpu_blob().dptr(); for (size_t i = 0, n = vals.size(); i < n; ++i) { const DType vBlob = v[i]; const DType vVect = vals[i]; - const bool near = BatchNormValidator::isNear( - vBlob, vVect, BatchNormValidator::ErrorBound(&cpu_blob())); + const bool near = BatchNormValidator::isNear( + vBlob, vVect, BatchNormValidator::ErrorBound(&cpu_blob())); ASSERT_TRUE(near); if (!near) { LOG(WARNING) << vBlob << " is not near enough to " << vVect << std::endl; @@ -1235,19 +1205,19 @@ static void compare(const RunContext& run_ctx, const TBlob& blob, const std::vec } #ifndef _WIN32 -template +template static void compare(const std::vector>& d1, const std::vector>& d2) { CHECK_EQ(d1.size(), d2.size()); for (size_t x = 0, xn = d1.size(); x < xn; ++x) { - const std::vector &vec1 = d1[x]; - const std::vector &vec2 = d2[x]; + const std::vector& vec1 = d1[x]; + const std::vector& vec2 = d2[x]; CHECK_EQ(vec1.size(), vec2.size()); for (size_t i = 0, n = vec1.size(); i < n; ++i) { - const DType v1 = vec1[i]; - const DType v2 = vec2[i]; + const DType v1 = vec1[i]; + const DType v2 = vec2[i]; const bool near = BatchNormValidator::isNear( - v1, v2, BatchNormValidator::ERROR_BOUND()); + v1, v2, BatchNormValidator::ERROR_BOUND()); if (!near) { LOG(WARNING) << v1 << " is not near enough to " << v2 << std::endl; ASSERT_TRUE(near); @@ -1256,7 +1226,7 @@ static void compare(const std::vector>& d1, } } -template +template static void testSaveAndLoad(const std::vector& dims, const int channelAxis, const std::vector>& inputChannelData, @@ -1271,10 +1241,10 @@ static void testSaveAndLoad(const std::vector& dims, RunContext cpu_run_ctx; cpu_run_ctx.ctx.dev_type = Context::kCPU; - cpu_run_ctx.ctx.dev_id = 0; - cpu_run_ctx.stream = nullptr; - std::unique_ptr blob(new test::StandaloneBlob( - shape, false, mshadow::DataType::kFlag)); + cpu_run_ctx.ctx.dev_id = 0; + cpu_run_ctx.stream = nullptr; + std::unique_ptr blob( + new test::StandaloneBlob(shape, false, mshadow::DataType::kFlag)); data.save(cpu_run_ctx, *blob, channelAxis); ChannelAxisTestData::print(cpu_run_ctx, "saved to blob", *blob); @@ -1287,34 +1257,35 @@ static void testSaveAndLoad(const std::vector& dims, TEST(BATCH_NORM, TestChannelAxisSaveAndLoad) { std::cout << std::endl << std::flush; - using DType = float; + using DType = float; using AccReal = float; - const std::vector> myData = - { { 1.0f, 1.0f, 1.0f, 1.0f }, - { 2.0f, 2.0f, 2.0f, 2.0f }, - { 3.0f, 3.0f, 3.0f, 3.0f } }; - - testSaveAndLoad({ 1, 3, 2, 2 }, 1, myData, - { 1.0f, 1.0f, 1.0f, 1.0f, - 2.0f, 2.0f, 2.0f, 2.0f, - 3.0f, 3.0f, 3.0f, 3.0f}); - - testSaveAndLoad({ 1, 2, 2, 3 }, 3, myData, - { 1.0f, 2.0f, 3.0f, - 1.0f, 2.0f, 3.0f, - 1.0f, 2.0f, 3.0f, - 1.0f, 2.0f, 3.0f}); - - testSaveAndLoad({ 1, 2, 3, 2 }, 2, myData, - { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f, - 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f}); + const std::vector> myData = { + {1.0f, 1.0f, 1.0f, 1.0f}, {2.0f, 2.0f, 2.0f, 2.0f}, {3.0f, 3.0f, 3.0f, 3.0f}}; + + testSaveAndLoad( + {1, 3, 2, 2}, + 1, + myData, + {1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f, 3.0f}); + + testSaveAndLoad( + {1, 2, 2, 3}, + 3, + myData, + {1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f}); + + testSaveAndLoad( + {1, 2, 3, 2}, + 2, + myData, + {1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f, 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f}); } /*! \brief Insert the channel field `channelCount` into the shape at `channelAxis` position */ static mxnet::TShape MakeShape(const std::vector& shape, - signed int channelAxis, - const size_t channelCount) { + signed int channelAxis, + const size_t channelCount) { if (channelAxis < 0) { channelAxis += shape.size() + 1; } @@ -1333,19 +1304,18 @@ static mxnet::TShape MakeShape(const std::vector& shape, /*! \brief Create and arrange equivalent data with different channel axes, then compare * normalized results */ -static void runChannelAxisTest( - const bool isGPU1, - const bool isGPU2, - const test::op::kwargs_t& base_kwargs, - const std::vector shape, - const signed int channelAxis1, - const signed int channelAxis2, - const size_t channelCount, - const bool simpleData, - const size_t numberOfPasses = 5 +static void runChannelAxisTest(const bool isGPU1, + const bool isGPU2, + const test::op::kwargs_t& base_kwargs, + const std::vector shape, + const signed int channelAxis1, + const signed int channelAxis2, + const size_t channelCount, + const bool simpleData, + const size_t numberOfPasses = 5 ) { - using DType = float; + using DType = float; using AccReal = float; size_t spatialSize = 1; @@ -1392,17 +1362,23 @@ static void runChannelAxisTest( // Create operator 1 with ChannelAxis2 (normally the experimental one) kwargs.push_back({"axis", std::to_string(channelAxis1)}); test::op::OpInfo> info_c1 = - test::op::createOpAndInfoF>( - BNOperatorExecutor::ArgsWithOpName( - kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU1, shape_c1, kwargs); + test::op::createOpAndInfoF>( + BNOperatorExecutor::ArgsWithOpName( + kwargs, "BatchNorm", "_backward_BatchNorm"), + isGPU1, + shape_c1, + kwargs); kwargs.pop_back(); // Create operator 2 with ChannelAxis2 (normally the control one) kwargs.push_back({"axis", std::to_string(channelAxis2)}); test::op::OpInfo> info_c2 = - test::op::createOpAndInfoF>( - BNOperatorExecutor::ArgsWithOpName( - kwargs, "BatchNorm", "_backward_BatchNorm"), isGPU2, shape_c2, kwargs); + test::op::createOpAndInfoF>( + BNOperatorExecutor::ArgsWithOpName( + kwargs, "BatchNorm", "_backward_BatchNorm"), + isGPU2, + shape_c2, + kwargs); kwargs.pop_back(); // Init operators @@ -1413,28 +1389,32 @@ static void runChannelAxisTest( // Save input data to blob with new shape 1 data_c1.save(info_c1.executor_->ctx().run_ctx, - info_c1.executor_->GetBlob(ForwardInputs::kForInData), channelAxis1); + info_c1.executor_->GetBlob(ForwardInputs::kForInData), + channelAxis1); ChannelAxisTestData::print(info_c1.executor_->ctx().run_ctx, "blob 1 input", info_c1.executor_->GetBlob(ForwardInputs::kForInData)); // Save input data to blob with new shape 2 data_c2.save(info_c2.executor_->ctx().run_ctx, - info_c2.executor_->GetBlob(ForwardInputs::kForInData), channelAxis2); + info_c2.executor_->GetBlob(ForwardInputs::kForInData), + channelAxis2); ChannelAxisTestData::print(info_c2.executor_->ctx().run_ctx, "blob 2 input", info_c2.executor_->GetBlob(ForwardInputs::kForInData)); // Save output grad to blob with new shape 1 grad_c1.save(info_c1.executor_->ctx().run_ctx, - info_c1.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad), channelAxis1); + info_c1.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad), + channelAxis1); ChannelAxisTestData::print(info_c1.executor_->ctx().run_ctx, "blob 1 output grad", info_c1.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad)); // Save output grad to blob with new shape 2 grad_c2.save(info_c2.executor_->ctx().run_ctx, - info_c2.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad), channelAxis2); + info_c2.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad), + channelAxis2); ChannelAxisTestData::print(info_c2.executor_->ctx().run_ctx, "blob 2 output grad", info_c2.executor_->GetBlob(BackwardInputs::bwd_out_grad_Grad)); @@ -1453,12 +1433,14 @@ static void runChannelAxisTest( // // Transform operator 1's blob output to a normalized shape data_c1.load(info_c1.executor_->ctx().run_ctx, - info_c1.executor_->GetBlob(ForwardOutputs::kForOutData), channelAxis1); + info_c1.executor_->GetBlob(ForwardOutputs::kForOutData), + channelAxis1); ChannelAxisTestData::print("channel data 1", data_c1.channel_data_); // Transform operator 2's blob output to a normalized shape data_c2.load(info_c2.executor_->ctx().run_ctx, - info_c2.executor_->GetBlob(ForwardOutputs::kForOutData), channelAxis2); + info_c2.executor_->GetBlob(ForwardOutputs::kForOutData), + channelAxis2); ChannelAxisTestData::print("channel data 2", data_c2.channel_data_); // Compare the operators' output data while they're in a normalized shape @@ -1469,12 +1451,14 @@ static void runChannelAxisTest( // // Transform operator 1's input-grad blob to a normalized shape grad_c1.load(info_c1.executor_->ctx().run_ctx, - info_c1.executor_->GetBlob(BackwardOutputs::bwd_in_grad_Data), channelAxis1); + info_c1.executor_->GetBlob(BackwardOutputs::bwd_in_grad_Data), + channelAxis1); ChannelAxisTestData::print("input grad 1", grad_c1.channel_data_); // Transform operator 2's input-grad blob to a normalized shape grad_c2.load(info_c2.executor_->ctx().run_ctx, - info_c2.executor_->GetBlob(BackwardOutputs::bwd_in_grad_Data), channelAxis2); + info_c2.executor_->GetBlob(BackwardOutputs::bwd_in_grad_Data), + channelAxis2); ChannelAxisTestData::print("input grad 2", grad_c2.channel_data_); // Compare the operators' input grad data while they're in a normalized shape @@ -1483,13 +1467,14 @@ static void runChannelAxisTest( TEST(BATCH_NORM, TestChannelAxisSimple) { std::cout << std::endl << std::flush; - const size_t CHANNEL_COUNT = 4; - const int DEFAULT_AXIS = 1; - const int NEW_AXIS = -2; - const bool useSimpleData = true; // change to true sometimes for troubleshooting + const size_t CHANNEL_COUNT = 4; + const int DEFAULT_AXIS = 1; + const int NEW_AXIS = -2; + const bool useSimpleData = true; // change to true sometimes for troubleshooting const std::vector shape = {1, 2, 3}; // Check against base-case of channel axis position 1 - runChannelAxisTest(false, false, + runChannelAxisTest(false, + false, useglobalstats_kwargs_nocudnn, shape, DEFAULT_AXIS, @@ -1554,123 +1539,116 @@ TEST(BATCH_NORM, TestChannelAxis) { #if MXNET_USE_CUDA TEST(BATCH_NORM, Test2DForward2D_gpu) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - TestBatchNormOperatorForward>( - true, - {BATCH_SIZE, CHANNELS, DH, DW}, - blank_kwargs); - TestBatchNormOperatorForward>( - true, - {BATCH_SIZE, CHANNELS, DH, DW}, - blank_kwargs_nocudnn); - }); + for (int type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + TestBatchNormOperatorForward>( + true, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs); + TestBatchNormOperatorForward>( + true, {BATCH_SIZE, CHANNELS, DH, DW}, blank_kwargs_nocudnn); + }); } } TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - const mxnet::TShape inputShape({1, 1, 2, 1}); - testForwardAndBackward>( + for (int type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + const mxnet::TShape inputShape({1, 1, 2, 1}); + testForwardAndBackward>( false, true, inputShape, blank_kwargs); - testForwardAndBackward>( + testForwardAndBackward>( false, true, inputShape, blank_kwargs_nocudnn); - }); + }); } } TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); - testForwardAndBackward>( + for (int type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); + testForwardAndBackward>( false, true, inputShape, blank_kwargs); - testForwardAndBackward>( + testForwardAndBackward>( false, true, inputShape, blank_kwargs_nocudnn); - }); + }); } } // nonfixgamma_kwargs TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_nfg) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - const mxnet::TShape inputShape({1, 1, 2, 1}); - testForwardAndBackward>( + for (int type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + const mxnet::TShape inputShape({1, 1, 2, 1}); + testForwardAndBackward>( false, true, inputShape, nonfixgamma_kwargs); - testForwardAndBackward>( + testForwardAndBackward>( false, true, inputShape, nonfixgamma_kwargs_nocudnn); - }); + }); } } TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_nfg) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); - testForwardAndBackward>( + for (int type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); + testForwardAndBackward>( false, true, inputShape, nonfixgamma_kwargs); - testForwardAndBackward>( + testForwardAndBackward>( false, true, inputShape, nonfixgamma_kwargs_nocudnn); - }); + }); } } // useglobalstats_kwargs TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_ugs) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - const mxnet::TShape inputShape({2, 3, 2, 2}); - testForwardAndBackward>( + for (int type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + const mxnet::TShape inputShape({2, 3, 2, 2}); + testForwardAndBackward>( false, true, inputShape, useglobalstats_kwargs_nocudnn); - testForwardAndBackward>( + testForwardAndBackward>( false, true, inputShape, useglobalstats_kwargs); - }); + }); } } TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_ugs) { - for (int type : v2_types) { - MSHADOW_REAL_TYPE_SWITCH_EX( - type, DType, AccReal, - { - const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); - testForwardAndBackward>( + for (int type : v2_types) { + MSHADOW_REAL_TYPE_SWITCH_EX(type, DType, AccReal, { + const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW}); + testForwardAndBackward>( false, true, inputShape, useglobalstats_kwargs); - testForwardAndBackward>( + testForwardAndBackward>( false, true, inputShape, useglobalstats_kwargs_nocudnn); - }); + }); } } #endif // MXNET_USE_CUDA #endif - diff --git a/tests/cpp/operator/coreop_perf.cc b/tests/cpp/operator/coreop_perf.cc index 14ef625e6915..42ddd926d497 100644 --- a/tests/cpp/operator/coreop_perf.cc +++ b/tests/cpp/operator/coreop_perf.cc @@ -33,13 +33,13 @@ using namespace mxnet; using kwargs_t = test::op::kwargs_t; -template +template static void RunCoreOpBidirectional(const bool isGPU, const kwargs_t& op_kwargs, - const char *op_name, - const char *backward_op_name = "") { + const char* op_name, + const char* backward_op_name = "") { const mxnet::TShape shape({5, 5}); - test::op::CoreOpExecutor op(isGPU, { shape }); + test::op::CoreOpExecutor op(isGPU, {shape}); op.set_verbose(false); op.Init(op.ArgsWithOpName(op_kwargs, op_name, backward_op_name)); @@ -56,38 +56,32 @@ static void RunCoreOpBidirectional(const bool isGPU, } } -template +template static void RunCoreOpTimingTest(const bool isGPU, const kwargs_t& op_kwargs, - const char *op_name, - const char *backward_op_name = "") { - const kwargs_t kwargs = test::op::CoreOpExecutor::ArgsWithOpName( - op_kwargs, op_name, backward_op_name); + const char* op_name, + const char* backward_op_name = "") { + const kwargs_t kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(op_kwargs, op_name, backward_op_name); // prime code and cache before the performance runs test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { {20, 3, 128, 128} }, kwargs, 1); + runner.RunBidirectional(false, {{20, 3, 128, 128}}, kwargs, 1); // Do the performance runs - std::vector shapes; + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } - const char *pu = isGPU ? "GPU" : "CPU"; - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest(std::string(op_name) + " Operator " + pu, isGPU, false, kwargs, - 2, 10, { shape }); + const char* pu = isGPU ? "GPU" : "CPU"; + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest( + std::string(op_name) + " Operator " + pu, isGPU, false, kwargs, 2, 10, {shape}); } } @@ -96,11 +90,13 @@ static void RunCoreOpTimingTest(const bool isGPU, */ TEST(COREOP_PERF, ExecuteBidirectional) { std::cout << "NEGATIVE CLIP GRADIENT" << std::endl; - RunCoreOpBidirectional(false, { {"lr", "0.01" }, { "clip_gradient", "-1" } }, + RunCoreOpBidirectional(false, + {{"lr", "0.01"}, {"clip_gradient", "-1"}}, "sgd_mom_update", COREOP_BWD_OP_NAME_VALUE_NONE); std::cout << "POSITIVE CLIP GRADIENT" << std::endl; - RunCoreOpBidirectional(false, { {"lr", "0.01" }, { "clip_gradient", "1" } }, + RunCoreOpBidirectional(false, + {{"lr", "0.01"}, {"clip_gradient", "1"}}, "sgd_mom_update", COREOP_BWD_OP_NAME_VALUE_NONE); } @@ -110,11 +106,13 @@ TEST(COREOP_PERF, ExecuteBidirectional) { */ TEST(COREOP_PERF, TimingCPU) { std::cout << "NEGATIVE CLIP GRADIENT" << std::endl; - RunCoreOpTimingTest(false, { {"lr", "0.01" }, { "clip_gradient", "-1" } }, + RunCoreOpTimingTest(false, + {{"lr", "0.01"}, {"clip_gradient", "-1"}}, "sgd_mom_update", COREOP_BWD_OP_NAME_VALUE_NONE); std::cout << "POSITIVE CLIP GRADIENT" << std::endl; - RunCoreOpTimingTest(false, { {"lr", "0.01" }, { "clip_gradient", "1" } }, + RunCoreOpTimingTest(false, + {{"lr", "0.01"}, {"clip_gradient", "1"}}, "sgd_mom_update", COREOP_BWD_OP_NAME_VALUE_NONE); } @@ -125,13 +123,14 @@ TEST(COREOP_PERF, TimingCPU) { */ TEST(COREOP_PERF, TimingGPU) { std::cout << "NEGATIVE CLIP GRADIENT" << std::endl; - RunCoreOpTimingTest(true, { {"lr", "0.01" }, { "clip_gradient", "-1" } }, + RunCoreOpTimingTest(true, + {{"lr", "0.01"}, {"clip_gradient", "-1"}}, "sgd_mom_update", COREOP_BWD_OP_NAME_VALUE_NONE); std::cout << "POSITIVE CLIP GRADIENT" << std::endl; - RunCoreOpTimingTest(true, { {"lr", "0.01" }, { "clip_gradient", "1" } }, + RunCoreOpTimingTest(true, + {{"lr", "0.01"}, {"clip_gradient", "1"}}, "sgd_mom_update", COREOP_BWD_OP_NAME_VALUE_NONE); } #endif // MXNET_USE_CUDA == 1 - diff --git a/tests/cpp/operator/dnnl_operator_test.cc b/tests/cpp/operator/dnnl_operator_test.cc index 7e2233c9b449..e66fc56bab2c 100644 --- a/tests/cpp/operator/dnnl_operator_test.cc +++ b/tests/cpp/operator/dnnl_operator_test.cc @@ -559,8 +559,8 @@ void TestConcatOp(const OpAttrs& attrs, VerifyFunc verify_fn, bool backwards = f int dim = std::stoi(str_dim); if (dim >= in_arr.arr.shape().ndim()) continue; - float scale = backwards ? 1 / static_cast(attrs.num_outputs) - : static_cast(attrs.num_inputs); + float scale = backwards ? 1 / static_cast(attrs.num_outputs) : + static_cast(attrs.num_inputs); std::vector scale_vector(in_arr.arr.shape().ndim()); for (int i = 0; i < in_arr.arr.shape().ndim(); i++) diff --git a/tests/cpp/operator/dropout_perf.cc b/tests/cpp/operator/dropout_perf.cc index 2a1754e2606f..71aad4395caa 100644 --- a/tests/cpp/operator/dropout_perf.cc +++ b/tests/cpp/operator/dropout_perf.cc @@ -32,7 +32,7 @@ using namespace mxnet; typedef std::vector > kwargs_t; -const kwargs_t basic_dropout_args = { }; +const kwargs_t basic_dropout_args = {}; /*! * \brief Generic bidirectional sanity test @@ -42,10 +42,9 @@ TEST(DROPOUT_PERF, ExecuteBidirectional) { kwargs_t kwargs = basic_dropout_args; kwargs.push_back({"mode", "always"}); test::op::CoreOperatorRunner runner; - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", - "_backward_Dropout"); + kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); runner.set_verbose(true); - runner.RunBidirectional(false, { shape }, kwargs, 1); + runner.RunBidirectional(false, {shape}, kwargs, 1); } /*! @@ -53,32 +52,25 @@ TEST(DROPOUT_PERF, ExecuteBidirectional) { */ TEST(DROPOUT_PERF, TimingCPU) { kwargs_t kwargs = basic_dropout_args; -// Which math function is arbitrary since it will have roughly constant timing among approaches + // Which math function is arbitrary since it will have roughly constant timing among approaches kwargs.push_back({"mode", "always"}); mxnet::TShape shape({10, 10, 10, 10}); test::op::CoreOperatorRunner runner; - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", - "_backward_Dropout"); - runner.RunBidirectional(false, { shape }, kwargs, 1); - std::vector shapes; + kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); + runner.RunBidirectional(false, {shape}, kwargs, 1); + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } - for (const mxnet::TShape &shape : shapes) { - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", - "_backward_Dropout"); - runner.TimingTest("Dropout Operator CPU", false, false, kwargs, 2, 10, { shape }, false); + for (const mxnet::TShape& shape : shapes) { + kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); + runner.TimingTest("Dropout Operator CPU", false, false, kwargs, 2, 10, {shape}, false); } } @@ -92,20 +84,14 @@ TEST(DROPOUT_PERF, TimingGPU) { kwargs.push_back({"mode", "always"}); mxnet::TShape shape({10, 10, 10, 10}); test::op::CoreOperatorRunner runner; - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", - "_backward_Dropout"); - runner.RunBidirectional(false, { shape }, kwargs, 1); - std::vector shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; - for (const mxnet::TShape &shape : shapes) { - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", - "_backward_Dropout"); - runner.TimingTest("Dropout Operator GPU", true, false, kwargs, 2, 10, { shape }, false); + kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); + runner.RunBidirectional(false, {shape}, kwargs, 1); + std::vector shapes = { + {1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; + for (const mxnet::TShape& shape : shapes) { + kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "Dropout", "_backward_Dropout"); + runner.TimingTest("Dropout Operator GPU", true, false, kwargs, 2, 10, {shape}, false); } } #endif // MXNET_USE_CUDA == 1 diff --git a/tests/cpp/operator/fully_conn_perf.cc b/tests/cpp/operator/fully_conn_perf.cc index 9fd70261dc93..b7bcde3f8c0e 100644 --- a/tests/cpp/operator/fully_conn_perf.cc +++ b/tests/cpp/operator/fully_conn_perf.cc @@ -34,7 +34,7 @@ using namespace mxnet; typedef std::vector > kwargs_t; -const kwargs_t basic_fullyconn_args = { {"num_hidden", "250"}, {"no_bias", "true"} }; +const kwargs_t basic_fullyconn_args = {{"num_hidden", "250"}, {"no_bias", "true"}}; /*! * \brief Generic bidirectional sanity test */ @@ -44,9 +44,9 @@ TEST(FULLY_CONNECTED, ExecuteBidirectionalFullyConnected) { kwargs_t kwargs = basic_fullyconn_args; test::op::CoreOperatorRunner runner; runner.set_verbose(true); - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", - "_backward_FullyConnected"); - runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1); + kwargs = test::op::CoreOpExecutor::ArgsWithOpName( + kwargs, "FullyConnected", "_backward_FullyConnected"); + runner.RunBidirectional(false, {shape1, shape2}, kwargs, 1); } /*! @@ -57,30 +57,23 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) { mxnet::TShape shape1({10, 10, 10, 10}); mxnet::TShape shape2({250, 1000}); test::op::CoreOperatorRunner runner; - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", - "_backward_FullyConnected"); - runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1); - std::vector shapes; + kwargs = test::op::CoreOpExecutor::ArgsWithOpName( + kwargs, "FullyConnected", "_backward_FullyConnected"); + runner.RunBidirectional(false, {shape1, shape2}, kwargs, 1); + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } for (const mxnet::TShape& shape : shapes) { mxnet::TShape shape2({250, static_cast(shape.ProdShape(1, shape.ndim()))}); - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", - "_backward_FullyConnected"); - runner.TimingTest("Fully connected CPU", false, false, kwargs, 2, 10, - { shape, shape2 }, false); + kwargs = test::op::CoreOpExecutor::ArgsWithOpName( + kwargs, "FullyConnected", "_backward_FullyConnected"); + runner.TimingTest("Fully connected CPU", false, false, kwargs, 2, 10, {shape, shape2}, false); } } @@ -93,30 +86,23 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingGPU) { mxnet::TShape shape1({10, 10, 10, 10}); mxnet::TShape shape2({250, 1000}); test::op::CoreOperatorRunner runner; - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", - "_backward_FullyConnected"); - runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1); - std::vector shapes; + kwargs = test::op::CoreOpExecutor::ArgsWithOpName( + kwargs, "FullyConnected", "_backward_FullyConnected"); + runner.RunBidirectional(false, {shape1, shape2}, kwargs, 1); + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } for (const mxnet::TShape& shape : shapes) { mxnet::TShape shape2({250, static_cast(shape.ProdShape(1, shape.ndim()))}); - kwargs = test::op::CoreOpExecutor::ArgsWithOpName(kwargs, "FullyConnected", - "_backward_FullyConnected"); - runner.TimingTest("Fully connected GPU", true, false, kwargs, 2, 10, - { shape, shape2 }, false); + kwargs = test::op::CoreOpExecutor::ArgsWithOpName( + kwargs, "FullyConnected", "_backward_FullyConnected"); + runner.TimingTest("Fully connected GPU", true, false, kwargs, 2, 10, {shape, shape2}, false); } } #endif // MXNET_USE_CUDA == 1 diff --git a/tests/cpp/operator/krprod_test.cc b/tests/cpp/operator/krprod_test.cc index 66ddddd771f8..df0812d59f32 100644 --- a/tests/cpp/operator/krprod_test.cc +++ b/tests/cpp/operator/krprod_test.cc @@ -36,16 +36,16 @@ using namespace mshadow; using namespace mshadow::expr; using DType = double; -#define EXPECT_DOUBLE_EQ_MATRIX(expected, actual) \ -{ \ - for (int i = 0; i < static_cast(actual.size(0)); ++i) \ - for (int j = 0; j < static_cast(actual.size(1)); ++j) \ - EXPECT_LE(std::abs(actual[i][j] - expected[i][j]), 1e-10); \ -} \ +#define EXPECT_DOUBLE_EQ_MATRIX(expected, actual) \ + { \ + for (int i = 0; i < static_cast(actual.size(0)); ++i) \ + for (int j = 0; j < static_cast(actual.size(1)); ++j) \ + EXPECT_LE(std::abs(actual[i][j] - expected[i][j]), 1e-10); \ + } TEST(row_wise_kronecker, OneInputMatrix) { // Input matrices of shape (2, 4) which is also the expected result - DType mat[8] {1, 2, 3, 4, 5, 6, 7, 8}; + DType mat[8]{1, 2, 3, 4, 5, 6, 7, 8}; // Make input tensors std::vector > ts_arr; @@ -64,12 +64,12 @@ TEST(row_wise_kronecker, OneInputMatrix) { TEST(row_wise_kronecker, TwoInputMatrices) { // Input matrices of shape (2, 3) and (2, 4) - DType mat1[6] {1, 2, 3, 4, 5, 6}; - DType mat2[8] {1, 2, 3, 4, 5, 6, 7, 8}; + DType mat1[6]{1, 2, 3, 4, 5, 6}; + DType mat2[8]{1, 2, 3, 4, 5, 6, 7, 8}; // Expect result of shape (2, 12) - DType expected[24] {1, 2, 3, 4, 2, 4, 6, 8, 3, 6, 9, 12, - 20, 24, 28, 32, 25, 30, 35, 40, 30, 36, 42, 48}; + DType expected[24]{1, 2, 3, 4, 2, 4, 6, 8, 3, 6, 9, 12, + 20, 24, 28, 32, 25, 30, 35, 40, 30, 36, 42, 48}; // Make input tensors std::vector > ts_arr; @@ -90,11 +90,11 @@ TEST(row_wise_kronecker, TwoInputMatrices) { TEST(row_wise_kronecker, TwoInputMatrices2) { // Input matrices of shape (2, 3) and (2, 1) - DType mat1[6] {1, 2, 3, 4, 5, 6}; - DType mat2[2] {1, 2}; + DType mat1[6]{1, 2, 3, 4, 5, 6}; + DType mat2[2]{1, 2}; // Expect result of shape (2, 3) - DType expected[6] {1, 2, 3, 8, 10, 12}; + DType expected[6]{1, 2, 3, 8, 10, 12}; // Make input tensors std::vector > ts_arr; @@ -117,9 +117,8 @@ TEST(row_wise_kronecker, ThreeInputMatrices) { std::default_random_engine generator; std::uniform_int_distribution distribution(1, 6); - Tensor in1(Shape2(3, 4)), in2(Shape2(3, 2)), - in3(Shape2(3, 3)), kr12(Shape2(3, 8)), kr13(Shape2(3, 24)), - result(Shape2(3, 24)); + Tensor in1(Shape2(3, 4)), in2(Shape2(3, 2)), in3(Shape2(3, 3)), kr12(Shape2(3, 8)), + kr13(Shape2(3, 24)), result(Shape2(3, 24)); AllocSpace(&in1); AllocSpace(&in2); AllocSpace(&in3); @@ -127,8 +126,8 @@ TEST(row_wise_kronecker, ThreeInputMatrices) { AllocSpace(&kr13); AllocSpace(&result); - std::vector > ts_arr {in1, in2, in3}; - for (auto & in : ts_arr) { + std::vector > ts_arr{in1, in2, in3}; + for (auto& in : ts_arr) { for (int i = 0; i < static_cast(in.size(0)); ++i) for (int j = 0; j < static_cast(in.size(1)); ++j) in[i][j] = distribution(generator); @@ -139,7 +138,7 @@ TEST(row_wise_kronecker, ThreeInputMatrices) { row_wise_kronecker(result, ts_arr); EXPECT_DOUBLE_EQ_MATRIX(kr13, result); - for (auto & in : ts_arr) + for (auto& in : ts_arr) FreeSpace(&in); FreeSpace(&kr12); FreeSpace(&kr13); @@ -150,9 +149,8 @@ TEST(row_wise_kronecker, ThreeInputMatrices2) { std::default_random_engine generator; std::uniform_int_distribution distribution(1, 6); - Tensor in1(Shape2(3, 4)), in2(Shape2(3, 1)), - in3(Shape2(3, 3)), kr12(Shape2(3, 4)), kr13(Shape2(3, 12)), - result(Shape2(3, 12)); + Tensor in1(Shape2(3, 4)), in2(Shape2(3, 1)), in3(Shape2(3, 3)), kr12(Shape2(3, 4)), + kr13(Shape2(3, 12)), result(Shape2(3, 12)); AllocSpace(&in1); AllocSpace(&in2); AllocSpace(&in3); @@ -160,8 +158,8 @@ TEST(row_wise_kronecker, ThreeInputMatrices2) { AllocSpace(&kr13); AllocSpace(&result); - std::vector > ts_arr {in1, in2, in3}; - for (auto & in : ts_arr) { + std::vector > ts_arr{in1, in2, in3}; + for (auto& in : ts_arr) { for (int i = 0; i < static_cast(in.size(0)); ++i) for (int j = 0; j < static_cast(in.size(1)); ++j) in[i][j] = distribution(generator); @@ -172,7 +170,7 @@ TEST(row_wise_kronecker, ThreeInputMatrices2) { row_wise_kronecker(result, ts_arr); EXPECT_DOUBLE_EQ_MATRIX(kr13, result); - for (auto & in : ts_arr) + for (auto& in : ts_arr) FreeSpace(&in); FreeSpace(&kr12); FreeSpace(&kr13); @@ -183,9 +181,8 @@ TEST(row_wise_kronecker, ThreeInputMatrices3) { std::default_random_engine generator; std::uniform_int_distribution distribution(1, 6); - Tensor in1(Shape2(3, 1)), in2(Shape2(3, 4)), - in3(Shape2(3, 3)), kr12(Shape2(3, 4)), kr13(Shape2(3, 12)), - result(Shape2(3, 12)); + Tensor in1(Shape2(3, 1)), in2(Shape2(3, 4)), in3(Shape2(3, 3)), kr12(Shape2(3, 4)), + kr13(Shape2(3, 12)), result(Shape2(3, 12)); AllocSpace(&in1); AllocSpace(&in2); AllocSpace(&in3); @@ -193,8 +190,8 @@ TEST(row_wise_kronecker, ThreeInputMatrices3) { AllocSpace(&kr13); AllocSpace(&result); - std::vector > ts_arr {in1, in2, in3}; - for (auto & in : ts_arr) { + std::vector > ts_arr{in1, in2, in3}; + for (auto& in : ts_arr) { for (int i = 0; i < static_cast(in.size(0)); ++i) for (int j = 0; j < static_cast(in.size(1)); ++j) in[i][j] = distribution(generator); @@ -205,7 +202,7 @@ TEST(row_wise_kronecker, ThreeInputMatrices3) { row_wise_kronecker(result, ts_arr); EXPECT_DOUBLE_EQ_MATRIX(kr13, result); - for (auto & in : ts_arr) + for (auto& in : ts_arr) FreeSpace(&in); FreeSpace(&kr12); FreeSpace(&kr13); @@ -216,10 +213,9 @@ TEST(row_wise_kronecker, FourInputMatrices) { std::default_random_engine generator; std::uniform_int_distribution distribution(1, 6); - Tensor in1(Shape2(3, 47)), in2(Shape2(3, 1)), - in3(Shape2(3, 5)), in4(Shape2(3, 2173)), kr12(Shape2(3, 47)), - kr13(Shape2(3, 47 * 5)), kr14(Shape2(3, 47 * 5 * 2173)), - result(Shape2(3, 47 * 5 * 2173)); + Tensor in1(Shape2(3, 47)), in2(Shape2(3, 1)), in3(Shape2(3, 5)), + in4(Shape2(3, 2173)), kr12(Shape2(3, 47)), kr13(Shape2(3, 47 * 5)), + kr14(Shape2(3, 47 * 5 * 2173)), result(Shape2(3, 47 * 5 * 2173)); AllocSpace(&in1); AllocSpace(&in2); AllocSpace(&in3); @@ -229,8 +225,8 @@ TEST(row_wise_kronecker, FourInputMatrices) { AllocSpace(&kr14); AllocSpace(&result); - std::vector > ts_arr {in1, in2, in3, in4}; - for (auto & in : ts_arr) { + std::vector > ts_arr{in1, in2, in3, in4}; + for (auto& in : ts_arr) { for (int i = 0; i < static_cast(in.size(0)); ++i) for (int j = 0; j < static_cast(in.size(1)); ++j) in[i][j] = distribution(generator); @@ -242,7 +238,7 @@ TEST(row_wise_kronecker, FourInputMatrices) { row_wise_kronecker(result, ts_arr); EXPECT_DOUBLE_EQ_MATRIX(kr14, result); - for (auto & in : ts_arr) + for (auto& in : ts_arr) FreeSpace(&in); FreeSpace(&kr12); FreeSpace(&kr13); @@ -250,11 +246,10 @@ TEST(row_wise_kronecker, FourInputMatrices) { FreeSpace(&result); } - #if MXNET_USE_LAPACK == 1 TEST(khatri_rao, OneInputMatrix) { // Input matrices of shape (2, 4) which is also the expected result - DType mat[8] {1, 2, 3, 4, 5, 6, 7, 8}; + DType mat[8]{1, 2, 3, 4, 5, 6, 7, 8}; // Make input tensors std::vector > ts_arr; @@ -273,12 +268,12 @@ TEST(khatri_rao, OneInputMatrix) { TEST(khatri_rao, TwoInputMatrices) { // Input matrices of shape (3, 2) and (4, 2) - DType mat1[6] {1, 4, 2, 5, 3, 6}; - DType mat2[8] {1, 5, 2, 6, 3, 7, 4, 8}; + DType mat1[6]{1, 4, 2, 5, 3, 6}; + DType mat2[8]{1, 5, 2, 6, 3, 7, 4, 8}; // Expect result of shape (12, 2) - DType expected[24] {1, 20, 2, 24, 3, 28, 4, 32, 2, 25, 4, 30, - 6, 35, 8, 40, 3, 30, 6, 36, 9, 42, 12, 48}; + DType expected[24]{1, 20, 2, 24, 3, 28, 4, 32, 2, 25, 4, 30, + 6, 35, 8, 40, 3, 30, 6, 36, 9, 42, 12, 48}; // Make input tensors std::vector > ts_arr; @@ -301,9 +296,8 @@ TEST(khatri_rao, ThreeInputMatrices) { std::default_random_engine generator; std::uniform_int_distribution distribution(1, 6); - Tensor in1(Shape2(4, 3)), in2(Shape2(2, 3)), - in3(Shape2(3, 3)), kr12(Shape2(8, 3)), kr13(Shape2(24, 3)), - result(Shape2(24, 3)); + Tensor in1(Shape2(4, 3)), in2(Shape2(2, 3)), in3(Shape2(3, 3)), kr12(Shape2(8, 3)), + kr13(Shape2(24, 3)), result(Shape2(24, 3)); AllocSpace(&in1); AllocSpace(&in2); AllocSpace(&in3); @@ -311,8 +305,8 @@ TEST(khatri_rao, ThreeInputMatrices) { AllocSpace(&kr13); AllocSpace(&result); - std::vector > ts_arr {in1, in2, in3}; - for (auto & in : ts_arr) { + std::vector > ts_arr{in1, in2, in3}; + for (auto& in : ts_arr) { for (int i = 0; i < static_cast(in.size(0)); ++i) for (int j = 0; j < static_cast(in.size(1)); ++j) in[i][j] = distribution(generator); @@ -323,7 +317,7 @@ TEST(khatri_rao, ThreeInputMatrices) { khatri_rao(result, ts_arr); EXPECT_DOUBLE_EQ_MATRIX(kr13, result); - for (auto & in : ts_arr) + for (auto& in : ts_arr) FreeSpace(&in); FreeSpace(&kr12); FreeSpace(&kr13); @@ -331,7 +325,7 @@ TEST(khatri_rao, ThreeInputMatrices) { } TEST(inv_khatri_rao, OneInputMatrixTransposed) { - DType mat[8] {1, 2, 3, 4, 5, 6, 7, 8}; + DType mat[8]{1, 2, 3, 4, 5, 6, 7, 8}; // Make input tensors std::vector > ts_arr; @@ -354,8 +348,8 @@ TEST(inv_khatri_rao, OneInputMatrixTransposed) { TEST(inv_khatri_rao, TwoInputMatrices) { // Input matrices of shape (3, 2) and (4, 2) - DType mat1[6] {1, 4, 2, 5, 3, 6}; - DType mat2[8] {1, 5, 2, 6, 3, 7, 4, 8}; + DType mat1[6]{1, 4, 2, 5, 3, 6}; + DType mat2[8]{1, 5, 2, 6, 3, 7, 4, 8}; // Make input tensors std::vector > ts_arr; @@ -382,8 +376,8 @@ TEST(inv_khatri_rao, TwoInputMatrices) { TEST(inv_khatri_rao, TwoInputMatricesTransposed) { // Transposed input matrices of shape (2, 3) and (2, 4) - DType mat1[6] {1, 2, 3, 4, 5, 6}; - DType mat2[8] {1, 2, 3, 4, 5, 6, 7, 8}; + DType mat1[6]{1, 2, 3, 4, 5, 6}; + DType mat2[8]{1, 2, 3, 4, 5, 6, 7, 8}; // Make input tensors std::vector > ts_arr; @@ -413,14 +407,13 @@ TEST(inv_khatri_rao, ThreeInputMatricesTranposed) { std::default_random_engine generator; std::uniform_int_distribution distribution(1, 6); - Tensor in1(Shape2(3, 4)), in2(Shape2(3, 2)), - in3(Shape2(3, 3)); + Tensor in1(Shape2(3, 4)), in2(Shape2(3, 2)), in3(Shape2(3, 3)); AllocSpace(&in1); AllocSpace(&in2); AllocSpace(&in3); - std::vector > ts_arr {in1, in2, in3}; - for (auto & in : ts_arr) { + std::vector > ts_arr{in1, in2, in3}; + for (auto& in : ts_arr) { for (int i = 0; i < static_cast(in.size(0)); ++i) for (int j = 0; j < static_cast(in.size(1)); ++j) in[i][j] = distribution(generator); @@ -440,7 +433,7 @@ TEST(inv_khatri_rao, ThreeInputMatricesTranposed) { actual_dot = implicit_dot(implicit_dot(inv_kr, kr_t.T()), inv_kr); EXPECT_DOUBLE_EQ_MATRIX(inv_kr, actual_dot); - for (auto & in : ts_arr) + for (auto& in : ts_arr) FreeSpace(&in); FreeSpace(&inv_kr); FreeSpace(&kr_t); diff --git a/tests/cpp/operator/runner/core_op_runner_test.cc b/tests/cpp/operator/runner/core_op_runner_test.cc index 6e6cb91096fe..733d933c811d 100644 --- a/tests/cpp/operator/runner/core_op_runner_test.cc +++ b/tests/cpp/operator/runner/core_op_runner_test.cc @@ -39,19 +39,17 @@ using kwargs_t = test::op::kwargs_t; static const kwargs_t basic_args = {}; static const std::vector> test_unary_operators = { - { "relu", "" }, // Code can figure out what the backward op is for some - { "sigmoid", "" }, - { "sqrt", "" } -}; + {"relu", ""}, // Code can figure out what the backward op is for some + {"sigmoid", ""}, + {"sqrt", ""}}; static const std::vector> test_binary_operators = { - { "elemwise_add", "_backward_add" }, - { "elemwise_mul", "_backward_mul" } -}; + {"elemwise_add", "_backward_add"}, + {"elemwise_mul", "_backward_mul"}}; -template +template inline std::vector AsVect(const TT& t) { - return std::move(std::vector({ t })); + return std::move(std::vector({t})); } /*! @@ -62,8 +60,8 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalSimpleUnaryList) { kwargs_t kwargs = basic_args; for (const std::pair& i : test_unary_operators) { - const char *op_name = i.first.c_str(); - const char *backward_op_name = i.second.c_str(); + const char* op_name = i.first.c_str(); + const char* backward_op_name = i.second.c_str(); test::op::CoreOpExecutor op(false, AsVect(shape)); op.set_verbose(false); @@ -87,8 +85,8 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalSimpleUnaryList) { */ TEST(CORE_OP_RUNNER, ExecuteBidirectionalList) { for (const std::pair& i : test_binary_operators) { - const char *op_name = i.first.c_str(); - const char *backward_op_name = i.second.c_str(); + const char* op_name = i.first.c_str(); + const char* backward_op_name = i.second.c_str(); mxnet::TShape shape({5, 5}); kwargs_t kwargs = basic_args; @@ -114,12 +112,12 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalList) { * \brief Execute bidirectional dot product, which has different shaped inputs and outputs */ TEST(CORE_OP_RUNNER, ExecuteBidirectionalDotProduct) { - const char *op_name = "dot"; - const char *backward_op_name = "_backward_dot"; + const char* op_name = "dot"; + const char* backward_op_name = "_backward_dot"; kwargs_t kwargs = basic_args; - test::op::CoreOpExecutor op(false, { mxnet::TShape({ 2, 3 }), mxnet::TShape({ 3, 2 }) }); + test::op::CoreOpExecutor op(false, {mxnet::TShape({2, 3}), mxnet::TShape({3, 2})}); op.set_verbose(false); op.Init(op.ArgsWithOpName(kwargs, op_name, backward_op_name)); @@ -139,11 +137,14 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunnerSimpleUnary) { typedef float DType; mxnet::TShape shape({5, 5}); for (const std::pair& i : test_unary_operators) { - const char *op_name = i.first.c_str(); - const char *backward_op_name = i.second.c_str(); + const char* op_name = i.first.c_str(); + const char* backward_op_name = i.second.c_str(); test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { shape }, test::op::CoreOpExecutor::ArgsWithOpName( - basic_args, op_name, backward_op_name), 1); + runner.RunBidirectional( + false, + {shape}, + test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name, backward_op_name), + 1); } } @@ -151,11 +152,14 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunner) { using DType = float; mxnet::TShape shape({5, 5}); for (const std::pair& i : test_binary_operators) { - const char *op_name = i.first.c_str(); - const char *backward_op_name = i.second.c_str(); + const char* op_name = i.first.c_str(); + const char* backward_op_name = i.second.c_str(); test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { shape }, test::op::CoreOpExecutor::ArgsWithOpName( - basic_args, op_name, backward_op_name), 1); + runner.RunBidirectional( + false, + {shape}, + test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name, backward_op_name), + 1); } } @@ -163,16 +167,15 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunner) { * \brief Test RunBidirectional dot product, which has different shaped inputs and outputs */ TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunnerDotProduct) { - using DType = float; - const char *op_name = "dot"; - const char *backward_op_name = "_backward_dot"; + using DType = float; + const char* op_name = "dot"; + const char* backward_op_name = "_backward_dot"; test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, - { mxnet::TShape({ 2, 3 }), mxnet::TShape({ 3, 2 }) }, - test::op::CoreOpExecutor::ArgsWithOpName(basic_args, - op_name, - backward_op_name), - 1); + runner.RunBidirectional( + false, + {mxnet::TShape({2, 3}), mxnet::TShape({3, 2})}, + test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name, backward_op_name), + 1); } /*! @@ -181,64 +184,50 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunnerDotProduct) { TEST(CORE_OP_RUNNER, TimingCPUSimpleUnary) { using DType = float; - const char *op_name = "relu"; + const char* op_name = "relu"; const kwargs_t kwargs = test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name); test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { mxnet::TShape({10, 10, 10, 10}) }, kwargs, 1); + runner.RunBidirectional(false, {mxnet::TShape({10, 10, 10, 10})}, kwargs, 1); - std::vector shapes; + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest(std::string(op_name) + "Operator CPU", - false, false, kwargs, 2, 10, { shape }); + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest(std::string(op_name) + "Operator CPU", false, false, kwargs, 2, 10, {shape}); } } TEST(CORE_OP_RUNNER, TimingCPUBinary) { using DType = float; - const char *op_name = "elemwise_add"; - const char *backward_op_name = "_backward_add"; + const char* op_name = "elemwise_add"; + const char* backward_op_name = "_backward_add"; - const kwargs_t kwargs = test::op::CoreOpExecutor::ArgsWithOpName( - basic_args, op_name, backward_op_name); + const kwargs_t kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name, backward_op_name); test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { mxnet::TShape({10, 10, 10, 10}) }, kwargs, 1); + runner.RunBidirectional(false, {mxnet::TShape({10, 10, 10, 10})}, kwargs, 1); - std::vector shapes; + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest(std::string(op_name) + "Operator CPU", false, - false, kwargs, 2, 10, { shape }); + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest(std::string(op_name) + "Operator CPU", false, false, kwargs, 2, 10, {shape}); } } @@ -248,94 +237,83 @@ TEST(CORE_OP_RUNNER, TimingCPUBinary) { TEST(CORE_OP_RUNNER, TimingCPUBinaryDotProduct) { using DType = float; - const char *op_name = "dot"; - const char *backward_op_name = "_backward_dot"; + const char* op_name = "dot"; + const char* backward_op_name = "_backward_dot"; - const kwargs_t kwargs = test::op::CoreOpExecutor::ArgsWithOpName( - basic_args, op_name, backward_op_name); + const kwargs_t kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name, backward_op_name); test::op::CoreOperatorRunner runner; - runner.RunBidirectional(false, { {2, 3}, {3, 2} }, kwargs, 1); // prime code and cache + runner.RunBidirectional(false, {{2, 3}, {3, 2}}, kwargs, 1); // prime code and cache - std::vector shapes; + std::vector shapes; if (test::performance_run) { - shapes = { {28, 28}, {18, 32}, {128, 24}, {128, 256} }; + shapes = {{28, 28}, {18, 32}, {128, 24}, {128, 256}}; } else { - shapes = { {28, 28}, {128, 24} }; + shapes = {{28, 28}, {128, 24}}; } mxnet::ShapeVector input_shapes(2); - for (const mxnet::TShape &shape : shapes) { + for (const mxnet::TShape& shape : shapes) { input_shapes[0] = shape; input_shapes[1] = mxnet::TShape({shape[1], shape[0]}); - runner.TimingTest(std::string(op_name) + " Operator CPU", false, - false, kwargs, 2, 10, input_shapes); + runner.TimingTest( + std::string(op_name) + " Operator CPU", false, false, kwargs, 2, 10, input_shapes); } } #if MXNET_USE_CUDA == 1 TEST(CORE_OP_RUNNER, TimingGPUSimpleUnary) { typedef float DType; - const char *op_name = "relu"; + const char* op_name = "relu"; const kwargs_t kwargs = test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name); test::op::CoreOperatorRunner runner; runner.RunBidirectional(false, - { mxnet::TShape({10, 10, 10, 10}) }, + {mxnet::TShape({10, 10, 10, 10})}, kwargs, 1); // prime code and cache - std::vector shapes; + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest(std::string(op_name) + "Operator GPU", true, false, kwargs, 2, 10, { shape }); - }} + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest(std::string(op_name) + "Operator GPU", true, false, kwargs, 2, 10, {shape}); + } +} TEST(CORE_OP_RUNNER, TimingGPUBinary) { typedef float DType; - const char *op_name = "elemwise_add"; - const char *backward_op_name = "_backward_add"; + const char* op_name = "elemwise_add"; + const char* backward_op_name = "_backward_add"; - const kwargs_t kwargs = test::op::CoreOpExecutor::ArgsWithOpName( - basic_args, op_name, backward_op_name); + const kwargs_t kwargs = + test::op::CoreOpExecutor::ArgsWithOpName(basic_args, op_name, backward_op_name); test::op::CoreOperatorRunner runner; runner.RunBidirectional(true, - { mxnet::TShape({10, 10, 10, 10}) }, + {mxnet::TShape({10, 10, 10, 10})}, kwargs, 1); // prime code and cache - std::vector shapes; + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 1, 28, 28}, - {1, 3, 28, 28}, - {50, 1, 18, 32}, - {50, 3, 18, 32}, - {20, 3, 128, 128} - }; + shapes = {{1, 1, 28, 28}, {1, 3, 28, 28}, {50, 1, 18, 32}, {50, 3, 18, 32}, {20, 3, 128, 128}}; } else { shapes = { - {1, 1, 28, 28}, - {50, 3, 18, 32}, + {1, 1, 28, 28}, + {50, 3, 18, 32}, }; } - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest(std::string(op_name) + "Operator GPU", true, false, kwargs, 2, 10, { shape }); + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest(std::string(op_name) + "Operator GPU", true, false, kwargs, 2, 10, {shape}); } } diff --git a/tests/cpp/operator/slice_channel_perf.cc b/tests/cpp/operator/slice_channel_perf.cc index 638613ea1ec9..6a3e622eb5f4 100644 --- a/tests/cpp/operator/slice_channel_perf.cc +++ b/tests/cpp/operator/slice_channel_perf.cc @@ -31,8 +31,8 @@ using namespace mxnet; -typedef std::vector > kwargs_t; -const kwargs_t basic_activation_args = { }; +typedef std::vector> kwargs_t; +const kwargs_t basic_activation_args = {}; /*! * \brief Generic bidirectional sanity test @@ -42,7 +42,7 @@ TEST(SLICE_CHANNEL_PERF, ExecuteBidirectional) { kwargs_t kwargs = basic_activation_args; kwargs.push_back({"num_outputs", "160"}); test::op::LegacyOpRunner runner; - runner.RunBidirectional(false, { shape }, kwargs, 1); + runner.RunBidirectional(false, {shape}, kwargs, 1); } /*! @@ -53,26 +53,16 @@ TEST(SLICE_CHANNEL_PERF, TimingCPU) { // Which math function is arbitrary since it will have roughly constant timing among approaches kwargs.push_back({"num_outputs", "160"}); test::op::LegacyOpRunner runner; - runner.RunBidirectional(false, - { mxnet::TShape({1, 160, 200}) }, - kwargs, 1); // prime code and cache - std::vector shapes; + runner.RunBidirectional( + false, {mxnet::TShape({1, 160, 200})}, kwargs, 1); // prime code and cache + std::vector shapes; if (test::performance_run) { - shapes = { - {1, 160, 200}, - {10, 160, 200}, - {100, 160, 200}, - {10, 160, 500}, - {100, 160, 500} - }; + shapes = {{1, 160, 200}, {10, 160, 200}, {100, 160, 200}, {10, 160, 500}, {100, 160, 500}}; } else { - shapes = { - {1, 160, 200}, - {1, 160, 200} - }; + shapes = {{1, 160, 200}, {1, 160, 200}}; } - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest("SliceChannel Operator CPU", false, false, kwargs, 2, 10, { shape }); + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest("SliceChannel Operator CPU", false, false, kwargs, 2, 10, {shape}); } } @@ -84,21 +74,13 @@ TEST(SLICE_CHANNEL_PERF, TimingGPU) { kwargs_t kwargs = basic_activation_args; // Which math function is arbitrary since it will have roughly constant timing among approaches kwargs.push_back({"num_outputs", "160"}); - test::OperatorRunner> runner; - runner.RunBidirectional(true, - { mxnet::TShape({1, 160, 200}) }, - kwargs, 1); // prime code and cache - std::vector shapes = { - {1, 160, 200}, - {1, 160, 200}, - {1, 160, 200}, - {1, 160, 200}, - {1, 160, 200} - }; - for (const mxnet::TShape &shape : shapes) { - runner.TimingTest("SliceChannel Operator GPU", true, false, kwargs, 2, 10, { shape }); + test::OperatorRunner> + runner; + runner.RunBidirectional(true, {mxnet::TShape({1, 160, 200})}, kwargs, 1); // prime code and cache + std::vector shapes = { + {1, 160, 200}, {1, 160, 200}, {1, 160, 200}, {1, 160, 200}, {1, 160, 200}}; + for (const mxnet::TShape& shape : shapes) { + runner.TimingTest("SliceChannel Operator GPU", true, false, kwargs, 2, 10, {shape}); } } #endif // MXNET_USE_CUDA == 1 - diff --git a/tests/cpp/operator/tune/operator_tune_test.cc b/tests/cpp/operator/tune/operator_tune_test.cc index 00a062698b17..7b78b0a6cd2a 100644 --- a/tests/cpp/operator/tune/operator_tune_test.cc +++ b/tests/cpp/operator/tune/operator_tune_test.cc @@ -33,7 +33,7 @@ using namespace mxnet; */ TEST(OMP_TUNING, ShowAllTunedOps) { const std::unordered_set& op_names = - mxnet::op::OperatorTune::TunedOperatorNames(); + mxnet::op::OperatorTune::TunedOperatorNames(); for (auto iter = op_names.begin(), e_iter = op_names.end(); iter != e_iter; ++iter) { std::cout << *iter << std::endl; } @@ -45,21 +45,19 @@ static std::vector tuning_shapes() { std::vector shapes; if (test::performance_run || test::csv) { shapes = { - {{1, 1, 28, 28}}, - {{1, 3, 28, 28}}, - {{50, 1, 18, 32}}, - {{25, 3, 64, 64}}, - {{10, 3, 128, 128}}, - {{20, 3, 128, 128}}, - {{30, 3, 128, 128}}, - {{30, 3, 256, 128}}, + {{1, 1, 28, 28}}, + {{1, 3, 28, 28}}, + {{50, 1, 18, 32}}, + {{25, 3, 64, 64}}, + {{10, 3, 128, 128}}, + {{20, 3, 128, 128}}, + {{30, 3, 128, 128}}, + {{30, 3, 256, 128}}, }; } else { - shapes = { - // Non-performance dataset acts as a sanity test - {{1, 1, 28, 28}}, - {{50, 3, 18, 32}} - }; + shapes = {// Non-performance dataset acts as a sanity test + {{1, 1, 28, 28}}, + {{50, 3, 18, 32}}}; } return shapes; } @@ -68,8 +66,8 @@ static std::vector tuning_shapes() { * \brief Generic bidirectional sanity test */ TEST(OMP_TUNING, ExecuteBidirectional) { - test::op::BasicRunCoreOpBidirectional(false, true, {}, {tuning_shapes()[0]}, - "elemwise_add", "_backward_add"); + test::op::BasicRunCoreOpBidirectional( + false, true, {}, {tuning_shapes()[0]}, "elemwise_add", "_backward_add"); } /* Some test results: @@ -93,26 +91,20 @@ TEST(OMP_TUNING, ExecuteBidirectional) { * \brief Rune a tuning evaluation * \tparam DType Data type for which to evaluate tuning */ -template +template static float EvaluateTune(const bool verbose = true) { std::vector> binary_operators; if (test::csv) { - binary_operators = { - {"elemwise_add", COREOP_BWD_OP_NAME_VALUE_NONE} - }; + binary_operators = {{"elemwise_add", COREOP_BWD_OP_NAME_VALUE_NONE}}; } else if (test::performance_run) { - binary_operators = { - {"relu", ""}, // Code can figure out what the backward op is for some - {"sigmoid", ""}, - {"sqrt", ""}, - {"elemwise_add", "_backward_add"}, - {"elemwise_mul", "_backward_mul"}, - {"elemwise_div", "_backward_div"} - }; + binary_operators = {{"relu", ""}, // Code can figure out what the backward op is for some + {"sigmoid", ""}, + {"sqrt", ""}, + {"elemwise_add", "_backward_add"}, + {"elemwise_mul", "_backward_mul"}, + {"elemwise_div", "_backward_div"}}; } else { - binary_operators = { - {"elemwise_add", "_backward_add"} - }; + binary_operators = {{"elemwise_add", "_backward_add"}}; } std::vector rates; for (size_t i = 0, n = binary_operators.size(); i < n; ++i) { @@ -120,18 +112,15 @@ static float EvaluateTune(const bool verbose = true) { tuningTester.set_calls_per_iteration(10); tuningTester.set_total_iterations(5); std::cout << "******************************" << std::endl; - std::cout << "Operators: " << binary_operators[i].first - << ", " << binary_operators[i].second - << " for type: " << test::type_name() - << std::endl; + std::cout << "Operators: " << binary_operators[i].first << ", " << binary_operators[i].second + << " for type: " << test::type_name() << std::endl; std::cout << "******************************" << std::endl; // Do the performance runs std::vector shapes = tuning_shapes(); - tuningTester.TestTunedOperator({}, verbose, shapes, - binary_operators[i].first.c_str(), - binary_operators[i].second.c_str()); + tuningTester.TestTunedOperator( + {}, verbose, shapes, binary_operators[i].first.c_str(), binary_operators[i].second.c_str()); rates.push_back(tuningTester.CalculateSuccessRate()); } return std::accumulate(rates.begin(), rates.end(), 0.0f) / rates.size(); @@ -175,4 +164,3 @@ TEST(OMP_TUNING, EvaluateTuneTestInt64) { } #endif // MXNET_USE_OPERATOR_TUNING - diff --git a/tests/cpp/storage/storage_test.cc b/tests/cpp/storage/storage_test.cc index 8cd7fd2e8569..ae33d9664ddd 100644 --- a/tests/cpp/storage/storage_test.cc +++ b/tests/cpp/storage/storage_test.cc @@ -20,7 +20,7 @@ * Copyright (c) 2017 by Contributors * \file storage_test.cc * \brief cpu/gpu storage tests -*/ + */ #include #include #include @@ -30,7 +30,7 @@ TEST(Storage, Basic_CPU) { constexpr size_t kSize = 1024; - auto&& storage = mxnet::Storage::Get(); + auto&& storage = mxnet::Storage::Get(); mxnet::Context context_cpu{}; auto&& handle = storage->Alloc(kSize, context_cpu); EXPECT_EQ(handle.ctx, context_cpu); @@ -48,7 +48,7 @@ TEST(Storage, Basic_CPU) { } TEST(Storage, CPU_MemAlign) { - #if MXNET_USE_ONEDNN == 1 +#if MXNET_USE_ONEDNN == 1 // DNNL requires special alignment. 64 is used by the DNNL library in // memory allocation. static constexpr size_t alignment_ = mxnet::kDNNLAlign; @@ -56,12 +56,12 @@ TEST(Storage, CPU_MemAlign) { static constexpr size_t alignment_ = 16; #endif - auto&& storage = mxnet::Storage::Get(); + auto&& storage = mxnet::Storage::Get(); mxnet::Context context_cpu = mxnet::Context::CPU(0); for (int i = 0; i < 5; ++i) { const size_t kSize = (std::rand() % 1024) + 1; - auto&& handle = storage->Alloc(kSize, context_cpu); + auto&& handle = storage->Alloc(kSize, context_cpu); EXPECT_EQ(handle.ctx, context_cpu); EXPECT_EQ(handle.size, kSize); EXPECT_EQ(reinterpret_cast(handle.dptr) % alignment_, 0); @@ -69,22 +69,21 @@ TEST(Storage, CPU_MemAlign) { } } - #if MXNET_USE_CUDA TEST(Storage_GPU, Basic_GPU) { if (mxnet::test::unitTestsWithCuda) { setenv("MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF", "20", 1); setenv("MXNET_GPU_MEM_POOL_TYPE", "Round", 1); - auto &&storage = mxnet::Storage::Get(); + auto&& storage = mxnet::Storage::Get(); mxnet::Context context_gpu = mxnet::Context::GPU(0); - auto &&handle = storage->Alloc(32, context_gpu); - auto &&handle2 = storage->Alloc(2097153, context_gpu); + auto&& handle = storage->Alloc(32, context_gpu); + auto&& handle2 = storage->Alloc(2097153, context_gpu); EXPECT_EQ(handle.ctx, context_gpu); EXPECT_EQ(handle.size, 32); EXPECT_EQ(handle2.ctx, context_gpu); EXPECT_EQ(handle2.size, 2097153); - auto ptr = handle.dptr; + auto ptr = handle.dptr; auto ptr2 = handle2.dptr; storage->Free(handle); storage->Free(handle2); @@ -109,10 +108,10 @@ TEST(Storage_GPU, Basic_GPU) { unsetenv("MXNET_GPU_MEM_POOL_TYPE"); } if (mxnet::test::unitTestsWithCuda) { - constexpr size_t kSize = 1024; + constexpr size_t kSize = 1024; mxnet::Context context_gpu = mxnet::Context::GPU(0); - auto &&storage = mxnet::Storage::Get(); - auto &&handle = storage->Alloc(kSize, context_gpu); + auto&& storage = mxnet::Storage::Get(); + auto&& handle = storage->Alloc(kSize, context_gpu); assert(handle.ctx == context_gpu); assert(handle.size == kSize); auto ptr = handle.dptr; @@ -129,4 +128,3 @@ TEST(Storage_GPU, Basic_GPU) { } } #endif // MXNET_USE_CUDA - diff --git a/tests/cpp/test_main.cc b/tests/cpp/test_main.cc index 4f91a4f67c09..69029ca3824d 100644 --- a/tests/cpp/test_main.cc +++ b/tests/cpp/test_main.cc @@ -22,7 +22,7 @@ * \file test_main.cc * \brief operator unit test utility functions * \author Chris Olivier -*/ + */ #include #include "mxnet/base.h" @@ -30,7 +30,8 @@ #include static bool dumpCallback(const google_breakpad::MinidumpDescriptor& descriptor, - void* context, bool succeeded) { + void* context, + bool succeeded) { printf("Dump path: %s\n", descriptor.path()); return succeeded; } @@ -44,9 +45,9 @@ bool debug_output = false; #else bool debug_output = false; #endif -bool quick_test = false; -bool performance_run = false; -bool csv = false; +bool quick_test = false; +bool performance_run = false; +bool csv = false; bool thread_safety_force_cpu = false; } // namespace test } // namespace mxnet @@ -60,8 +61,8 @@ static bool checkForWorkingCuda() { for (int device = 0; device < device_count; ++device) { cudaDeviceProp prop; if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) { - std::cout << "Found CUDA Device #: " << device << " properties: " << prop.major - << "." << prop.minor << std::endl; + std::cout << "Found CUDA Device #: " << device << " properties: " << prop.major << "." + << prop.minor << std::endl; workingCuda = true; } } @@ -80,7 +81,7 @@ void backtrace_test() { CHECK(false) << "backtrace()"; } -int main(int argc, char ** argv) { +int main(int argc, char** argv) { #ifdef USE_BREAKPAD google_breakpad::MinidumpDescriptor descriptor("/tmp"); google_breakpad::ExceptionHandler eh(descriptor, NULL, dumpCallback, NULL, true, -1); @@ -92,7 +93,7 @@ int main(int argc, char ** argv) { mxnet::test::unitTestsWithCuda = checkForWorkingCuda(); // auto-determine for (int x = 1; x < argc; ++x) { - const char *arg = argv[x]; + const char* arg = argv[x]; // force checks with CUDA if (!strcmp(arg, "--with-cuda")) { // override (ie force attempt CUDA) @@ -108,8 +109,8 @@ int main(int argc, char ** argv) { } else if (!strcmp(arg, "--thread-safety-with-cpu")) { mxnet::test::thread_safety_force_cpu = true; } else if (!strcmp(arg, "--backtrace")) { - backtrace_test(); - return 0; + backtrace_test(); + return 0; } } From 6ac1280ad0c74e9f78d76c79253205393f267fc2 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 4 Nov 2021 09:01:11 +0100 Subject: [PATCH 03/10] [INCLUDE] Re-format .cc .h files --- include/mxnet/base.h | 100 +- include/mxnet/c_api.h | 1340 ++++++++++++------------ include/mxnet/c_api_error.h | 42 +- include/mxnet/c_api_test.h | 21 +- include/mxnet/engine.h | 53 +- include/mxnet/executor.h | 94 +- include/mxnet/expr_operator.h | 11 +- include/mxnet/imperative.h | 112 +- include/mxnet/io.h | 46 +- include/mxnet/ir/expr.h | 2 +- include/mxnet/kvstore.h | 50 +- include/mxnet/lib_api.h | 1286 ++++++++++++++--------- include/mxnet/libinfo.h | 9 +- include/mxnet/node/container.h | 66 +- include/mxnet/node/node.h | 10 +- include/mxnet/op_attr_types.h | 112 +- include/mxnet/operator.h | 121 +-- include/mxnet/operator_util.h | 105 +- include/mxnet/random_generator.h | 77 +- include/mxnet/resource.h | 74 +- include/mxnet/rtc.h | 18 +- include/mxnet/runtime/c_runtime_api.h | 27 +- include/mxnet/runtime/container.h | 43 +- include/mxnet/runtime/container_ext.h | 289 +++-- include/mxnet/runtime/data_type.h | 22 +- include/mxnet/runtime/ffi_helper.h | 40 +- include/mxnet/runtime/memory.h | 52 +- include/mxnet/runtime/ndarray.h | 2 +- include/mxnet/runtime/ndarray_handle.h | 4 +- include/mxnet/runtime/object.h | 193 ++-- include/mxnet/runtime/packed_func.h | 345 +++--- include/mxnet/runtime/py_arg.h | 3 +- include/mxnet/runtime/registry.h | 47 +- include/mxnet/storage.h | 20 +- include/mxnet/tensor_blob.h | 239 +++-- include/mxnet/tuple.h | 241 +++-- 36 files changed, 2827 insertions(+), 2489 deletions(-) mode change 100755 => 100644 include/mxnet/tensor_blob.h diff --git a/include/mxnet/base.h b/include/mxnet/base.h index dc428da8e484..0934250fec80 100644 --- a/include/mxnet/base.h +++ b/include/mxnet/base.h @@ -36,7 +36,6 @@ #include "libinfo.h" #include "tuple.h" - /*! * \brief define dllexport for Visual Studio */ @@ -64,7 +63,7 @@ /*! \brief patch version */ #define MXNET_PATCH 0 /*! \brief mxnet version */ -#define MXNET_VERSION (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) +#define MXNET_VERSION (MXNET_MAJOR * 10000 + MXNET_MINOR * 100 + MXNET_PATCH) /*! \brief helper for making version number */ #define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch) /*! @@ -91,8 +90,8 @@ using Op = nnvm::Op; struct Context { /*! \brief Type of device */ enum DeviceType { - kCPU = cpu::kDevMask, - kGPU = gpu::kDevMask, + kCPU = cpu::kDevMask, + kGPU = gpu::kDevMask, kCPUPinned = 3, kCPUShared = 5, }; @@ -107,14 +106,16 @@ struct Context { * \return cpu::kDevMask or gpu::kDevMask */ inline DeviceType dev_mask() const { - if (dev_type == kCPUPinned || dev_type == kCPUShared) return kCPU; + if (dev_type == kCPUPinned || dev_type == kCPUShared) + return kCPU; return dev_type; } /*! * \brief Returns dev_id for kGPU and kCPUPinned, 0 otherwise */ inline int real_dev_id() const { - if (dev_type == kCPUPinned || dev_type == kGPU) return dev_id; + if (dev_type == kCPUPinned || dev_type == kGPU) + return dev_id; return 0; } /*! @@ -122,13 +123,13 @@ struct Context { * \param b another context to compare * \return compared result */ - inline bool operator<(const Context &b) const; + inline bool operator<(const Context& b) const; /*! * \brief check if current context equals another one * \param b another context to compare * \return whether dev mask and id are same */ - inline bool operator==(const Context &b) const { + inline bool operator==(const Context& b) const { return dev_type == b.dev_type && dev_id == b.dev_id; } /*! @@ -136,14 +137,14 @@ struct Context { * \param b another context to compare * \return whether they are not the same */ - inline bool operator!=(const Context &b) const { + inline bool operator!=(const Context& b) const { return !(*this == b); } /*! * \brief save the content into binary stream * \param strm the output stream */ - inline void Save(dmlc::Stream *strm) const { + inline void Save(dmlc::Stream* strm) const { strm->Write(&dev_type, sizeof(dev_type)); strm->Write(&dev_id, sizeof(dev_id)); } @@ -152,9 +153,11 @@ struct Context { * \param strm the output stream * \return whether the load is successful */ - inline bool Load(dmlc::Stream *strm) { - if (strm->Read(&dev_type, sizeof(dev_type)) != sizeof(dev_type)) return false; - if (strm->Read(&dev_id, sizeof(int32_t)) != sizeof(int32_t)) return false; + inline bool Load(dmlc::Stream* strm) { + if (strm->Read(&dev_type, sizeof(dev_type)) != sizeof(dev_type)) + return false; + if (strm->Read(&dev_id, sizeof(int32_t)) != sizeof(int32_t)) + return false; return true; } /*! \brief the maximal device type */ @@ -197,7 +200,7 @@ struct Context { * \param total_mem pointer to the uint64_t holding total GPU memory * \return No return value */ - inline static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total); + inline static void GetGPUMemoryInformation(int dev, uint64_t* free, uint64_t* total); /*! * Create a pinned CPU context. * \param dev_id the device id for corresponding GPU. @@ -219,10 +222,10 @@ struct Context { private: #if MXNET_USE_CUDA - static void CudaLibChecks(); + static void CudaLibChecks(); #endif #if MXNET_USE_CUDNN - static void CuDNNLibChecks(); + static void CuDNNLibChecks(); #endif }; @@ -234,19 +237,18 @@ class GPUAuxStream { * \brief constructor. * \param primary_stream gpu stream that is synced with the created auxiliary stream. */ - explicit GPUAuxStream(mshadow::Stream *primary_stream) : - primary_stream_(primary_stream), - aux_stream_(primary_stream), - gpu_stream_sync_event_(nullptr) { + explicit GPUAuxStream(mshadow::Stream* primary_stream) + : primary_stream_(primary_stream), + aux_stream_(primary_stream), + gpu_stream_sync_event_(nullptr) { if (Context::GetGPUStreamsPerWorker() >= 2) { // Create auxiliary stream on the same device with the same properties as the primary stream bool primary_has_blas_handle = primary_stream->blas_handle_ownership_ == mshadow::Stream::OwnHandle; bool primary_has_dnn_handle = primary_stream->dnn_handle_ownership_ == mshadow::Stream::OwnHandle; - aux_stream_ = mshadow::NewStream(primary_has_blas_handle, - primary_has_dnn_handle, - primary_stream->dev_id); + aux_stream_ = mshadow::NewStream( + primary_has_blas_handle, primary_has_dnn_handle, primary_stream->dev_id); MSHADOW_CUDA_CALL(cudaEventCreateWithFlags(&gpu_stream_sync_event_, cudaEventDisableTiming)); } } @@ -275,21 +277,23 @@ class GPUAuxStream { StreamSync(aux_stream_, primary_stream_, gpu_stream_sync_event_); } /*! \brief Getter for created auxiliary stream. */ - mshadow::Stream *GetStream() { return aux_stream_; } + mshadow::Stream* GetStream() { + return aux_stream_; + } /*! * \brief Make future work enqueued to `s2` wait on completion of current work enqueued to `s1`. * \param s1 stream with work that must be completed before future s2 work can begin. * \param s2 stream whose future work is made to wait on the completion of existing s1 work. * \param event used to pass s1 state to s2. */ - static void StreamSync(mshadow::Stream *s1, mshadow::Stream *s2, cudaEvent_t event) { + static void StreamSync(mshadow::Stream* s1, mshadow::Stream* s2, cudaEvent_t event) { MSHADOW_CUDA_CALL(cudaEventRecord(event, s1->stream_)); MSHADOW_CUDA_CALL(cudaStreamWaitEvent(s2->stream_, event, 0)); } private: - mshadow::Stream *primary_stream_; - mshadow::Stream *aux_stream_; + mshadow::Stream* primary_stream_; + mshadow::Stream* aux_stream_; cudaEvent_t gpu_stream_sync_event_; }; @@ -307,7 +311,7 @@ class SyncedGPUAuxStream { * \brief constructor. * \param gpu_aux_stream auxilary gpu stream that is managed by this RAII object. */ - explicit SyncedGPUAuxStream(GPUAuxStream *gpu_aux_stream) : gpu_aux_stream_(gpu_aux_stream) { + explicit SyncedGPUAuxStream(GPUAuxStream* gpu_aux_stream) : gpu_aux_stream_(gpu_aux_stream) { gpu_aux_stream_->PreAuxStreamUseSync(); } /*! \brief destructor */ @@ -328,7 +332,7 @@ class SyncedGPUAuxStream { } private: - GPUAuxStream *gpu_aux_stream_; + GPUAuxStream* gpu_aux_stream_; }; #endif // MXNET_USE_CUDA @@ -342,11 +346,11 @@ struct RunContext { /*! * \brief the stream of the device, can be nullptr or Stream* in GPU mode */ - void *stream; + void* stream; /*! * \brief the auxiliary stream of the device, can be nullptr or Stream* in GPU mode */ - void *aux_stream; + void* aux_stream; /*! * \brief pointer to the cuda event pool used by the dependency engine */ @@ -356,7 +360,7 @@ struct RunContext { * \return the mshadow stream * \tparam xpu the device type of the stream */ - template + template inline mshadow::Stream* get_stream() const { return static_cast*>(stream); } @@ -379,7 +383,7 @@ struct RunContext { //! \cond Doxygen_Suppress namespace mxnet { // implementing Context -inline bool Context::operator<(const Context &b) const { +inline bool Context::operator<(const Context& b) const { if (dev_type == b.dev_type) { return dev_id < b.dev_id; } else { @@ -389,7 +393,7 @@ inline bool Context::operator<(const Context &b) const { inline Context Context::Create(DeviceType dev_type, int32_t dev_id) { Context ctx; ctx.dev_type = dev_type; - ctx.dev_id = dev_id < 0 ? 0 : dev_id; + ctx.dev_id = dev_id < 0 ? 0 : dev_id; if (dev_type & kGPU) { #if MXNET_USE_CUDA CudaLibChecks(); @@ -461,8 +465,7 @@ inline int32_t Context::GetGPUStreamsPerWorker() { return num_streams; } -inline void Context::GetGPUMemoryInformation(int dev, uint64_t *free_mem, - uint64_t *total_mem) { +inline void Context::GetGPUMemoryInformation(int dev, uint64_t* free_mem, uint64_t* total_mem) { #if MXNET_USE_CUDA size_t memF, memT; @@ -481,12 +484,11 @@ inline void Context::GetGPUMemoryInformation(int dev, uint64_t *free_mem, e = cudaSetDevice(curDevice); CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e); - *free_mem = static_cast(memF); + *free_mem = static_cast(memF); *total_mem = static_cast(memT); #else - LOG(FATAL) - << "This call is only supported for MXNet built with CUDA support."; + LOG(FATAL) << "This call is only supported for MXNet built with CUDA support."; #endif } @@ -496,10 +498,10 @@ inline Context Context::FromString(const std::string& str) { const std::string::size_type l = str.find('('); CHECK_NE(l, std::string::npos); const std::string::size_type r = str.find(')'); - CHECK_EQ(r, str.length()-1); + CHECK_EQ(r, str.length() - 1); const std::string type = str.substr(0, l); - int id = std::stoi(str.substr(l+1, r-l-1)); + int id = std::stoi(str.substr(l + 1, r - l - 1)); if (type == "cpu") { ret = CPU(id); } else if (type == "gpu") { @@ -517,7 +519,7 @@ inline Context Context::FromString(const std::string& str) { return ret; } -inline std::ostream& operator<<(std::ostream &out, const Context &ctx) { +inline std::ostream& operator<<(std::ostream& out, const Context& ctx) { if (ctx.dev_type == Context::kCPU) { out << "cpu("; } else if (ctx.dev_type == Context::kGPU) { @@ -535,10 +537,9 @@ inline std::ostream& operator<<(std::ostream &out, const Context &ctx) { // describe op registration point #define STRINGIZE_DETAIL(x) #x -#define STRINGIZE(x) STRINGIZE_DETAIL(x) +#define STRINGIZE(x) STRINGIZE_DETAIL(x) #define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__)) -#define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__) - +#define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__) #if MXNET_USE_ONEDNN == 1 || MXNET_USE_INTGEMM == 1 constexpr size_t kDNNLAlign = 64; @@ -547,17 +548,18 @@ constexpr size_t kDNNLAlign = 64; } // namespace mxnet namespace std { -template<> struct hash { +template <> +struct hash { size_t operator()(const mxnet::Context& ctx) const { size_t res = 0; - res = dmlc::HashCombine(res, static_cast(ctx.dev_type)); - res = dmlc::HashCombine(res, static_cast(ctx.dev_id)); + res = dmlc::HashCombine(res, static_cast(ctx.dev_type)); + res = dmlc::HashCombine(res, static_cast(ctx.dev_id)); return res; } }; #if __cplusplus < 201402L && !defined(_MSC_VER) -template +template inline std::unique_ptr make_unique(Args&&... args) { return std::unique_ptr(new T(std::forward(args)...)); } diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 0aff74772c47..7611236e50e7 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -63,51 +63,51 @@ typedef int64_t dim_t; // will be casted internally to specific pointers types // these typedefs are mainly used for readablity reasons /*! \brief handle to NDArray */ -typedef void *NDArrayHandle; +typedef void* NDArrayHandle; /*! \brief handle to a mxnet narray function that changes NDArray */ -typedef const void *FunctionHandle; +typedef const void* FunctionHandle; /*! \brief handle to a function that takes param and creates symbol */ -typedef void *AtomicSymbolCreator; +typedef void* AtomicSymbolCreator; /*! \brief handle to cached operator */ -typedef void *CachedOpHandle; +typedef void* CachedOpHandle; /*! \brief handle to a symbol that can be bind as operator */ -typedef void *SymbolHandle; +typedef void* SymbolHandle; /*! \brief handle to a AtomicSymbol */ -typedef void *AtomicSymbolHandle; +typedef void* AtomicSymbolHandle; /*! \brief handle to an Executor */ -typedef void *ExecutorHandle; +typedef void* ExecutorHandle; /*! \brief handle a dataiter creator */ -typedef void *DataIterCreator; +typedef void* DataIterCreator; /*! \brief handle to a DataIterator */ -typedef void *DataIterHandle; +typedef void* DataIterHandle; /*! \brief handle a dataset creator */ -typedef void *DatasetCreator; +typedef void* DatasetCreator; /*! \brief handle to a Dataset */ -typedef void *DatasetHandle; +typedef void* DatasetHandle; /*! \brief handle to a BatchifyFunction creator*/ -typedef void *BatchifyFunctionCreator; +typedef void* BatchifyFunctionCreator; /*! \brief handle to a BatchifyFunction */ -typedef void *BatchifyFunctionHandle; +typedef void* BatchifyFunctionHandle; /*! \brief handle to KVStore */ -typedef void *KVStoreHandle; +typedef void* KVStoreHandle; /*! \brief handle to RecordIO */ -typedef void *RecordIOHandle; +typedef void* RecordIOHandle; /*! \brief handle to MXRtc*/ -typedef void *RtcHandle; +typedef void* RtcHandle; /*! \brief handle to rtc cuda module*/ -typedef void *CudaModuleHandle; +typedef void* CudaModuleHandle; /*! \brief handle to rtc cuda kernel*/ -typedef void *CudaKernelHandle; +typedef void* CudaKernelHandle; /*! \brief handle to a Profile object (domain, duration, counter, etc.) */ -typedef void *ProfileHandle; +typedef void* ProfileHandle; /*! \brief handle to DLManagedTensor*/ -typedef void *DLManagedTensorHandle; +typedef void* DLManagedTensorHandle; /*! \brief handle to Context */ -typedef const void *ContextHandle; +typedef const void* ContextHandle; /*! \brief handle to Engine FnProperty */ -typedef const void *EngineFnPropertyHandle; +typedef const void* EngineFnPropertyHandle; /*! \brief handle to Engine VarHandle */ -typedef void *EngineVarHandle; +typedef void* EngineVarHandle; /*! \brief Engine asynchronous operation */ typedef void (*EngineAsyncFunc)(void*, void*, void*, void*); @@ -116,10 +116,7 @@ typedef void (*EngineSyncFunc)(void*, void*); /*! \brief Callback to free the param for EngineAsyncFunc/EngineSyncFunc */ typedef void (*EngineFuncParamDeleter)(void*); /*! \brief Monitor callback called at operator level for cached op */ -typedef void (*CachedOpMonitorCallback)(const char*, - const char*, - NDArrayHandle); - +typedef void (*CachedOpMonitorCallback)(const char*, const char*, NDArrayHandle); struct NativeOpInfo { void (*forward)(int, float**, int*, unsigned**, int*, void*); @@ -141,8 +138,7 @@ struct NDArrayOpInfo { bool (*infer_shape)(int, int*, unsigned**, void*); bool (*list_outputs)(char***, void*); bool (*list_arguments)(char***, void*); - bool (*declare_backward_dependency)(const int*, const int*, const int*, - int*, int**, void*); + bool (*declare_backward_dependency)(const int*, const int*, const int*, int*, int**, void*); // all functions also pass a payload void* pointer void* p_forward; void* p_backward; @@ -157,7 +153,7 @@ typedef int (*MXGenericCallback)(void); struct MXCallbackList { int num_callbacks; int (**callbacks)(void); - void **contexts; + void** contexts; }; struct LibFeature { @@ -165,11 +161,7 @@ struct LibFeature { bool enabled; }; -enum CustomOpCallbacks { - kCustomOpDelete, - kCustomOpForward, - kCustomOpBackward -}; +enum CustomOpCallbacks { kCustomOpDelete, kCustomOpForward, kCustomOpBackward }; enum CustomOpPropCallbacks { kCustomOpPropDelete, @@ -184,39 +176,50 @@ enum CustomOpPropCallbacks { kCustomOpPropBackwardInferStorageType }; - -typedef int (*CustomOpFBFunc)(int /*size*/, void** /*ptrs*/, int* /*tags*/, - const int* /*reqs*/, const int /*is_train*/, +typedef int (*CustomOpFBFunc)(int /*size*/, + void** /*ptrs*/, + int* /*tags*/, + const int* /*reqs*/, + const int /*is_train*/, void* /*state*/); typedef int (*CustomOpDelFunc)(void* /*state*/); typedef int (*CustomOpListFunc)(char*** /*args*/, void* /*state*/); -typedef int (*CustomOpInferShapeFunc)(int /*num_input*/, int* /*ndims*/, - int** /*shapes*/, void* /*state*/); +typedef int (*CustomOpInferShapeFunc)(int /*num_input*/, + int* /*ndims*/, + int** /*shapes*/, + void* /*state*/); typedef int (*CustomOpInferStorageTypeFunc)(int /*num_input*/, int* /*stypes*/, void* /*state*/); typedef int (*CustomOpBackwardInferStorageTypeFunc)(int /*num_input*/, - int * /*stypes*/, - int * /*tags*/, - void * /*state*/); + int* /*stypes*/, + int* /*tags*/, + void* /*state*/); typedef int (*CustomOpInferTypeFunc)(int /*num_input*/, int* /*types*/, void* /*state*/); -typedef int (*CustomOpBwdDepFunc)(const int* /*out_grad*/, const int* /*in_data*/, - const int* /*out_data*/, int* /*num_deps*/, - int** /*rdeps*/, void* /*state*/); -typedef int (*CustomOpCreateFunc)(const char* /*ctx*/, int /*num_inputs*/, - unsigned** /*shapes*/, const int* /*ndims*/, - const int* /*dtypes*/, struct MXCallbackList* /*ret*/, +typedef int (*CustomOpBwdDepFunc)(const int* /*out_grad*/, + const int* /*in_data*/, + const int* /*out_data*/, + int* /*num_deps*/, + int** /*rdeps*/, void* /*state*/); -typedef int (*CustomOpPropCreator)(const char* /*op_type*/, const int /*num_kwargs*/, - const char** /*keys*/, const char** /*values*/, +typedef int (*CustomOpCreateFunc)(const char* /*ctx*/, + int /*num_inputs*/, + unsigned** /*shapes*/, + const int* /*ndims*/, + const int* /*dtypes*/, + struct MXCallbackList* /*ret*/, + void* /*state*/); +typedef int (*CustomOpPropCreator)(const char* /*op_type*/, + const int /*num_kwargs*/, + const char** /*keys*/, + const char** /*values*/, struct MXCallbackList* /*ret*/); +enum CustomFunctionCallbacks { kCustomFunctionBackward, kCustomFunctionDelete }; -enum CustomFunctionCallbacks { - kCustomFunctionBackward, - kCustomFunctionDelete -}; - -typedef int (*CustomFunctionBwdFunc)(int /*num_ograds*/, int /*num_igrads*/, void** /*ptrs*/, - const int* /*reqs*/, const int /*is_train*/, +typedef int (*CustomFunctionBwdFunc)(int /*num_ograds*/, + int /*num_igrads*/, + void** /*ptrs*/, + const int* /*reqs*/, + const int /*is_train*/, void* /*state*/); typedef int (*CustomFunctionDelFunc)(void* /*state*/); @@ -229,7 +232,7 @@ typedef int (*CustomFunctionDelFunc)(void* /*state*/); * this function is threadsafe and can be called by different thread * \return error info */ -MXNET_DLL const char *MXGetLastError(); +MXNET_DLL const char* MXGetLastError(); //------------------------------------- // Part 0: Global State setups @@ -241,7 +244,7 @@ MXNET_DLL const char *MXGetLastError(); * \param 0 for quiet, 1 for verbose * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXLoadLib(const char *path, unsigned verbose, void** lib); +MXNET_DLL int MXLoadLib(const char* path, unsigned verbose, void** lib); /*! * \brief Get list of features supported on the runtime @@ -249,7 +252,7 @@ MXNET_DLL int MXLoadLib(const char *path, unsigned verbose, void** lib); * \param size of the array * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXLibInfoFeatures(const struct LibFeature **libFeature, size_t *size); +MXNET_DLL int MXLibInfoFeatures(const struct LibFeature** libFeature, size_t* size); /*! * \brief return whether the mxnet library is compiled with cxx11 abi @@ -299,7 +302,8 @@ MXNET_DLL int MXNotifyShutdown(); * \param kvstoreHandle handle to kvstore * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXSetProcessProfilerConfig(int num_params, const char* const* keys, +MXNET_DLL int MXSetProcessProfilerConfig(int num_params, + const char* const* keys, const char* const* vals, KVStoreHandle kvstoreHandle); @@ -323,7 +327,8 @@ MXNET_DLL int MXSetProfilerConfig(int num_params, const char* const* keys, const * \param kvstoreHandle handle to kvstore, needed for server process profiling * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXSetProcessProfilerState(int state, int profile_process, +MXNET_DLL int MXSetProcessProfilerState(int state, + int profile_process, KVStoreHandle kvStoreHandle); /*! @@ -353,7 +358,6 @@ MXNET_DLL int MXSetProfilerScope(const char* scope); */ MXNET_DLL int MXDumpProcessProfile(int finished, int profile_process, KVStoreHandle kvStoreHandle); - /*! * \brief Save profile and stop profiler for worker/current process * \param finished true if stat output should stop after this point @@ -372,8 +376,11 @@ MXNET_DLL int MXDumpProfile(int finished); * \return 0 when success, -1 when failure happens. * \note */ -MXNET_DLL int MXAggregateProfileStatsPrint(const char **out_str, int reset, int format, - int sort_by, int ascending); +MXNET_DLL int MXAggregateProfileStatsPrint(const char** out_str, + int reset, + int format, + int sort_by, + int ascending); /*! * \brief Pause profiler tuning collection @@ -399,7 +406,7 @@ MXNET_DLL int MXProfilePause(int paused); * \param out Return domain object * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXProfileCreateDomain(const char *domain, ProfileHandle *out); +MXNET_DLL int MXProfileCreateDomain(const char* domain, ProfileHandle* out); /*! * \brief Create profile task @@ -408,9 +415,7 @@ MXNET_DLL int MXProfileCreateDomain(const char *domain, ProfileHandle *out); * \param out Output handle * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXProfileCreateTask(ProfileHandle domain, - const char *task_name, - ProfileHandle *out); +MXNET_DLL int MXProfileCreateTask(ProfileHandle domain, const char* task_name, ProfileHandle* out); /*! * \brief Create profile frame @@ -420,8 +425,8 @@ MXNET_DLL int MXProfileCreateTask(ProfileHandle domain, * \return 0 when success, -1 when failure happens. */ MXNET_DLL int MXProfileCreateFrame(ProfileHandle domain, - const char *frame_name, - ProfileHandle *out); + const char* frame_name, + ProfileHandle* out); /*! * \brief Create profile event @@ -429,7 +434,7 @@ MXNET_DLL int MXProfileCreateFrame(ProfileHandle domain, * \param out Output handle * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXProfileCreateEvent(const char *event_name, ProfileHandle *out); +MXNET_DLL int MXProfileCreateEvent(const char* event_name, ProfileHandle* out); /*! * \brief Create profile counter @@ -439,8 +444,8 @@ MXNET_DLL int MXProfileCreateEvent(const char *event_name, ProfileHandle *out); * \return 0 when success, -1 when failure happens. */ MXNET_DLL int MXProfileCreateCounter(ProfileHandle domain, - const char *counter_name, - ProfileHandle *out); + const char* counter_name, + ProfileHandle* out); /*! * \brief Destroy a frame @@ -487,8 +492,8 @@ MXNET_DLL int MXProfileAdjustCounter(ProfileHandle counter_handle, int64_t value * \return 0 when success, -1 when failure happens. */ MXNET_DLL int MXProfileSetMarker(ProfileHandle domain, - const char *instant_marker_name, - const char *scope); + const char* instant_marker_name, + const char* scope); /*! * \brief Set the number of OMP threads to use @@ -519,7 +524,7 @@ MXNET_DLL int MXGetGPUCount(int* out); * \param total_mem pointer to the integer holding total GPU memory * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXGetGPUMemoryInformation(int dev, int *free_mem, int *total_mem); +MXNET_DLL int MXGetGPUMemoryInformation(int dev, int* free_mem, int* total_mem); /*! * \brief get the free and total available memory on a GPU @@ -528,14 +533,14 @@ MXNET_DLL int MXGetGPUMemoryInformation(int dev, int *free_mem, int *total_mem); * \param total_mem pointer to the uint64_t holding total GPU memory * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXGetGPUMemoryInformation64(int dev, uint64_t *free_mem, uint64_t *total_mem); +MXNET_DLL int MXGetGPUMemoryInformation64(int dev, uint64_t* free_mem, uint64_t* total_mem); /*! * \brief get the MXNet library version as an integer * \param pointer to the integer holding the version number * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXGetVersion(int *out); +MXNET_DLL int MXGetVersion(int* out); /*! * \brief Load TVM operator from the binary library @@ -543,7 +548,7 @@ MXNET_DLL int MXGetVersion(int *out); * \return 0 when success, -1 when failure happens */ #if MXNET_USE_TVM_OP -MXNET_DLL int MXLoadTVMOp(const char *libpath); +MXNET_DLL int MXLoadTVMOp(const char* libpath); struct OtherOptionEntity { int val; @@ -572,7 +577,6 @@ typedef struct ConfigSpaces { MXNET_DLL int MXLoadTVMConfig(ConfigSpaces config); #endif // MXNET_USE_TVM_OP - //------------------------------------- // Part 1: NDArray creation and deletion //------------------------------------- @@ -583,7 +587,7 @@ MXNET_DLL int MXLoadTVMConfig(ConfigSpaces config); * \param out the returning handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayCreateNone(NDArrayHandle *out); +MXNET_DLL int MXNDArrayCreateNone(NDArrayHandle* out); /*! * \brief create a NDArray with specified shape and data type @@ -599,13 +603,13 @@ MXNET_DLL int MXNDArrayCreateNone(NDArrayHandle *out); * \param out the returning handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayCreate(const uint32_t *shape, +MXNET_DLL int MXNDArrayCreate(const uint32_t* shape, uint32_t ndim, int dev_type, int dev_id, int delay_alloc, int dtype, - NDArrayHandle *out); + NDArrayHandle* out); #define MXNDArrayCreateEx MXNDArrayCreate // backward compatibility for external deps /*! @@ -622,13 +626,13 @@ MXNET_DLL int MXNDArrayCreate(const uint32_t *shape, * \param out the returning handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayCreate64(const int64_t *shape, +MXNET_DLL int MXNDArrayCreate64(const int64_t* shape, int ndim, int dev_type, int dev_id, int delay_alloc, int dtype, - NDArrayHandle *out); + NDArrayHandle* out); /*! * \brief create an empty sparse NDArray with specified shape and data type @@ -650,17 +654,17 @@ MXNET_DLL int MXNDArrayCreate64(const int64_t *shape, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayCreateSparseEx(int storage_type, - const uint32_t *shape, + const uint32_t* shape, uint32_t ndim, int dev_type, int dev_id, int delay_alloc, int dtype, uint32_t num_aux, - int *aux_type, - uint32_t *aux_ndims, - const uint32_t *aux_shape, - NDArrayHandle *out); + int* aux_type, + uint32_t* aux_ndims, + const uint32_t* aux_shape, + NDArrayHandle* out); /*! * \brief create an empty sparse NDArray with specified shape and data type @@ -682,17 +686,17 @@ MXNET_DLL int MXNDArrayCreateSparseEx(int storage_type, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayCreateSparseEx64(int storage_type, - const int64_t *shape, + const int64_t* shape, int ndim, int dev_type, int dev_id, int delay_alloc, int dtype, uint32_t num_aux, - int *aux_type, - int *aux_ndims, - const int64_t *aux_shape, - NDArrayHandle *out); + int* aux_type, + int* aux_ndims, + const int64_t* aux_shape, + NDArrayHandle* out); /*! * \brief create a NDArray handle that is loaded from raw bytes. @@ -701,9 +705,7 @@ MXNET_DLL int MXNDArrayCreateSparseEx64(int storage_type, * \param out the returning handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayLoadFromRawBytes(const void *buf, - size_t size, - NDArrayHandle *out); +MXNET_DLL int MXNDArrayLoadFromRawBytes(const void* buf, size_t size, NDArrayHandle* out); /*! * \brief save the NDArray into raw bytes. * \param handle the NDArray handle @@ -711,9 +713,7 @@ MXNET_DLL int MXNDArrayLoadFromRawBytes(const void *buf, * \param out_buf the head of returning memory bytes. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArraySaveRawBytes(NDArrayHandle handle, - size_t *out_size, - const char **out_buf); +MXNET_DLL int MXNDArraySaveRawBytes(NDArrayHandle handle, size_t* out_size, const char** out_buf); /*! * \brief Save list of narray into the file. * \param fname name of the file. @@ -748,9 +748,9 @@ MXNET_DLL int MXNDArraySave(const char* fname, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayLoad(const char* fname, - uint32_t *out_size, + uint32_t* out_size, NDArrayHandle** out_arr, - uint32_t *out_name_size, + uint32_t* out_name_size, const char*** out_names); /*! @@ -767,11 +767,11 @@ MXNET_DLL int MXNDArrayLoad(const char* fname, * \param out_names the names of returning NDArrays, can be NULL * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayLoadFromBuffer(const void *ndarray_buffer, +MXNET_DLL int MXNDArrayLoadFromBuffer(const void* ndarray_buffer, size_t size, - uint32_t *out_size, + uint32_t* out_size, NDArrayHandle** out_arr, - uint32_t *out_name_size, + uint32_t* out_name_size, const char*** out_names); /*! @@ -785,9 +785,7 @@ MXNET_DLL int MXNDArrayLoadFromBuffer(const void *ndarray_buffer, * \param data the data source to copy from. * \param size the memory size we want to copy from. */ -MXNET_DLL int MXNDArraySyncCopyFromCPU(NDArrayHandle handle, - const void *data, - size_t size); +MXNET_DLL int MXNDArraySyncCopyFromCPU(NDArrayHandle handle, const void* data, size_t size); /*! * \brief Perform a synchronize copyto a contiguous CPU memory region. * @@ -799,9 +797,7 @@ MXNET_DLL int MXNDArraySyncCopyFromCPU(NDArrayHandle handle, * \param data the data source to copy into. * \param size the memory size we want to copy into. */ -MXNET_DLL int MXNDArraySyncCopyToCPU(NDArrayHandle handle, - void *data, - size_t size); +MXNET_DLL int MXNDArraySyncCopyToCPU(NDArrayHandle handle, void* data, size_t size); /*! * \brief Copy src.data() to dst.data() if i = -1, else dst.aux_data(i) if i >= 0 @@ -864,7 +860,7 @@ MXNET_DLL int MXNDArrayFree(NDArrayHandle handle); MXNET_DLL int MXNDArraySlice(NDArrayHandle handle, uint32_t slice_begin, uint32_t slice_end, - NDArrayHandle *out); + NDArrayHandle* out); /*! * \brief Slice the NDArray along axis 0. @@ -879,7 +875,7 @@ MXNET_DLL int MXNDArraySlice(NDArrayHandle handle, MXNET_DLL int MXNDArraySlice64(NDArrayHandle handle, int64_t slice_begin, int64_t slice_end, - NDArrayHandle *out); + NDArrayHandle* out); /*! * \brief Index the NDArray along axis 0. @@ -890,9 +886,7 @@ MXNET_DLL int MXNDArraySlice64(NDArrayHandle handle, * \param out The NDArrayHandle of output NDArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayAt(NDArrayHandle handle, - uint32_t idx, - NDArrayHandle *out); +MXNET_DLL int MXNDArrayAt(NDArrayHandle handle, uint32_t idx, NDArrayHandle* out); /*! * \brief Index the NDArray along axis 0. @@ -903,15 +897,12 @@ MXNET_DLL int MXNDArrayAt(NDArrayHandle handle, * \param out The NDArrayHandle of output NDArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayAt64(NDArrayHandle handle, - int64_t idx, - NDArrayHandle *out); +MXNET_DLL int MXNDArrayAt64(NDArrayHandle handle, int64_t idx, NDArrayHandle* out); /*! * \brief get the storage type of the array */ -MXNET_DLL int MXNDArrayGetStorageType(NDArrayHandle handle, - int *out_storage_type); +MXNET_DLL int MXNDArrayGetStorageType(NDArrayHandle handle, int* out_storage_type); /*! * \brief Reshape the NDArray. @@ -921,10 +912,7 @@ MXNET_DLL int MXNDArrayGetStorageType(NDArrayHandle handle, * \param out the NDArrayHandle of reshaped NDArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayReshape(NDArrayHandle handle, - int ndim, - int *dims, - NDArrayHandle *out); +MXNET_DLL int MXNDArrayReshape(NDArrayHandle handle, int ndim, int* dims, NDArrayHandle* out); /*! * \brief Reshape the NDArray. @@ -936,9 +924,9 @@ MXNET_DLL int MXNDArrayReshape(NDArrayHandle handle, */ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle, int ndim, - dim_t *dims, + dim_t* dims, bool reverse, - NDArrayHandle *out); + NDArrayHandle* out); /*! * \brief get the shape of the array @@ -949,9 +937,7 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle, * \param out_pdata pointer holder to get data pointer of the shape * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle, - int *out_dim, - const int **out_pdata); +MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle, int* out_dim, const int** out_pdata); /*! * \brief get the shape of the array @@ -962,9 +948,7 @@ MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle, * \param out_pdata pointer holder to get data pointer of the shape * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetShape64(NDArrayHandle handle, - int *out_dim, - const int64_t **out_pdata); +MXNET_DLL int MXNDArrayGetShape64(NDArrayHandle handle, int* out_dim, const int64_t** out_pdata); /*! * \brief get the content of the data in NDArray @@ -972,37 +956,35 @@ MXNET_DLL int MXNDArrayGetShape64(NDArrayHandle handle, * \param out_pdata pointer holder to get pointer of data * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetData(NDArrayHandle handle, - void **out_pdata); -/*! -* \brief Create a reference view of NDArray that -* represents as DLManagedTensor -* Notice: MXNet uses asynchronous execution. Please call MXNDArrayWaitToRead or -* MXNDArrayWaitToWrite before calling MXNDArrayToDLPack. -* \param handle the handle to the ndarray -* \param out_dlpack pointer holder to get pointer of DLManagedTensor -* \return 0 when success, -1 when failure happens -*/ -MXNET_DLL int MXNDArrayToDLPack(NDArrayHandle handle, - DLManagedTensorHandle *out_dlpack); - -/*! -* \brief Create a NDArray backed by a dlpack tensor. -* -* This allows us to create a NDArray using the memory -* allocated by an external deep learning framework -* that is DLPack compatible. -* -* The memory is retained until the NDArray went out of scope. -* -* \param dlpack the pointer of the input DLManagedTensor -* \param transient_handle whether the handle will be destructed before calling the deleter -* \param out_handle pointer holder to get pointer of NDArray -* \return 0 when success, -1 when failure happens -*/ +MXNET_DLL int MXNDArrayGetData(NDArrayHandle handle, void** out_pdata); +/*! + * \brief Create a reference view of NDArray that + * represents as DLManagedTensor + * Notice: MXNet uses asynchronous execution. Please call MXNDArrayWaitToRead or + * MXNDArrayWaitToWrite before calling MXNDArrayToDLPack. + * \param handle the handle to the ndarray + * \param out_dlpack pointer holder to get pointer of DLManagedTensor + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXNDArrayToDLPack(NDArrayHandle handle, DLManagedTensorHandle* out_dlpack); + +/*! + * \brief Create a NDArray backed by a dlpack tensor. + * + * This allows us to create a NDArray using the memory + * allocated by an external deep learning framework + * that is DLPack compatible. + * + * The memory is retained until the NDArray went out of scope. + * + * \param dlpack the pointer of the input DLManagedTensor + * \param transient_handle whether the handle will be destructed before calling the deleter + * \param out_handle pointer holder to get pointer of NDArray + * \return 0 when success, -1 when failure happens + */ MXNET_DLL int MXNDArrayFromDLPack(DLManagedTensorHandle dlpack, const bool transient_handle, - NDArrayHandle *out_handle); + NDArrayHandle* out_handle); /*! * \brief Delete a dlpack tensor @@ -1017,8 +999,7 @@ MXNET_DLL int MXNDArrayCallDLPackDeleter(DLManagedTensorHandle dlpack); * \param out_dtype pointer holder to get type of data * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetDType(NDArrayHandle handle, - int *out_dtype); +MXNET_DLL int MXNDArrayGetDType(NDArrayHandle handle, int* out_dtype); /*! * \brief get the type of the ith aux data in NDArray @@ -1029,9 +1010,7 @@ MXNET_DLL int MXNDArrayGetDType(NDArrayHandle handle, * \param out_type pointer holder to get type of aux data * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetAuxType(NDArrayHandle handle, - uint32_t i, - int *out_type); +MXNET_DLL int MXNDArrayGetAuxType(NDArrayHandle handle, uint32_t i, int* out_type); /*! * \brief get the type of the ith aux data in NDArray @@ -1042,9 +1021,7 @@ MXNET_DLL int MXNDArrayGetAuxType(NDArrayHandle handle, * \param out_type pointer holder to get type of aux data * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetAuxType64(NDArrayHandle handle, - int64_t i, - int *out_type); +MXNET_DLL int MXNDArrayGetAuxType64(NDArrayHandle handle, int64_t i, int* out_type); /*! * \brief Get a deep copy of the ith aux data blob @@ -1053,9 +1030,7 @@ MXNET_DLL int MXNDArrayGetAuxType64(NDArrayHandle handle, * in the form of an NDArray of default storage type. * This function blocks. Do not use it in performance critical code. */ -MXNET_DLL int MXNDArrayGetAuxNDArray(NDArrayHandle handle, - uint32_t i, - NDArrayHandle *out); +MXNET_DLL int MXNDArrayGetAuxNDArray(NDArrayHandle handle, uint32_t i, NDArrayHandle* out); /*! * \brief Get a deep copy of the ith aux data blob @@ -1064,17 +1039,14 @@ MXNET_DLL int MXNDArrayGetAuxNDArray(NDArrayHandle handle, * in the form of an NDArray of default storage type. * This function blocks. Do not use it in performance critical code. */ -MXNET_DLL int MXNDArrayGetAuxNDArray64(NDArrayHandle handle, - int64_t i, - NDArrayHandle *out); +MXNET_DLL int MXNDArrayGetAuxNDArray64(NDArrayHandle handle, int64_t i, NDArrayHandle* out); /*! * \brief Get a deep copy of the data blob * in the form of an NDArray of default storage type. * This function blocks. Do not use it in performance critical code. */ -MXNET_DLL int MXNDArrayGetDataNDArray(NDArrayHandle handle, - NDArrayHandle *out); +MXNET_DLL int MXNDArrayGetDataNDArray(NDArrayHandle handle, NDArrayHandle* out); /*! * \brief get the context of the NDArray * \param handle the handle to the narray @@ -1082,21 +1054,19 @@ MXNET_DLL int MXNDArrayGetDataNDArray(NDArrayHandle handle, * \param out_dev_id the output device id * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetContext(NDArrayHandle handle, - int *out_dev_type, - int *out_dev_id); +MXNET_DLL int MXNDArrayGetContext(NDArrayHandle handle, int* out_dev_type, int* out_dev_id); /*! * \brief return gradient buffer attached to this NDArray * \param handle NDArray handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetGrad(NDArrayHandle handle, NDArrayHandle *out); +MXNET_DLL int MXNDArrayGetGrad(NDArrayHandle handle, NDArrayHandle* out); /*! * \brief detach and ndarray from computation graph by clearing entry_ * \param handle NDArray handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayDetach(NDArrayHandle handle, NDArrayHandle *out); +MXNET_DLL int MXNDArrayDetach(NDArrayHandle handle, NDArrayHandle* out); /*! * \brief set the flag for gradient array state. * \param handle NDArray handle @@ -1110,7 +1080,7 @@ MXNET_DLL int MXNDArraySetGradState(NDArrayHandle handle, int state); * \param state the new state. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayGetGradState(NDArrayHandle handle, int *out); +MXNET_DLL int MXNDArrayGetGradState(NDArrayHandle handle, int* out); //-------------------------------- // Part 2: functions on NDArray //-------------------------------- @@ -1121,8 +1091,7 @@ MXNET_DLL int MXNDArrayGetGradState(NDArrayHandle handle, int *out); * \param out_array the output function array * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXListFunctions(uint32_t *out_size, - FunctionHandle **out_array); +MXNET_DLL int MXListFunctions(uint32_t* out_size, FunctionHandle** out_array); /*! * \brief get the function handle by name @@ -1130,8 +1099,7 @@ MXNET_DLL int MXListFunctions(uint32_t *out_size, * \param out the corresponding function handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXGetFunction(const char *name, - FunctionHandle *out); +MXNET_DLL int MXGetFunction(const char* name, FunctionHandle* out); /*! * \brief Get the information of the function handle. * \param fun The function handle. @@ -1145,13 +1113,13 @@ MXNET_DLL int MXGetFunction(const char *name, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXFuncGetInfo(FunctionHandle fun, - const char **name, - const char **description, - uint32_t *num_args, - const char ***arg_names, - const char ***arg_type_infos, - const char ***arg_descriptions, - const char **return_type DEFAULT(NULL)); + const char** name, + const char** description, + uint32_t* num_args, + const char*** arg_names, + const char*** arg_type_infos, + const char*** arg_descriptions, + const char** return_type DEFAULT(NULL)); /*! * \brief get the argument requirements of the function * \param fun input function handle @@ -1163,10 +1131,10 @@ MXNET_DLL int MXFuncGetInfo(FunctionHandle fun, * \sa MXFuncInvoke */ MXNET_DLL int MXFuncDescribe(FunctionHandle fun, - uint32_t *num_use_vars, - uint32_t *num_scalars, - uint32_t *num_mutate_vars, - int *type_mask); + uint32_t* num_use_vars, + uint32_t* num_scalars, + uint32_t* num_mutate_vars, + int* type_mask); /*! * \brief invoke a function, the array size of passed in arguments * must match the values in the @@ -1181,12 +1149,12 @@ MXNET_DLL int MXFuncDescribe(FunctionHandle fun, * \sa MXFuncDescribeArgs */ MXNET_DLL int MXFuncInvoke(FunctionHandle fun, - NDArrayHandle *use_vars, - float *scalar_args, - NDArrayHandle *mutate_vars, + NDArrayHandle* use_vars, + float* scalar_args, + NDArrayHandle* mutate_vars, int num_params, - char **param_keys, - char **param_vals); + char** param_keys, + char** param_vals); /*! * \brief invoke a nnvm op and imperative function * \param creator the op @@ -1202,13 +1170,13 @@ MXNET_DLL int MXFuncInvoke(FunctionHandle fun, */ MXNET_DLL int MXImperativeInvoke(AtomicSymbolCreator creator, int num_inputs, - NDArrayHandle *inputs, - int *num_outputs, - NDArrayHandle **outputs, + NDArrayHandle* inputs, + int* num_outputs, + NDArrayHandle** outputs, int num_params, - const char **param_keys, - const char **param_vals, - const int **out_stypes); + const char** param_keys, + const char** param_vals, + const int** out_stypes); /*! * \brief set whether to record operator for autograd * \param is_recording 1 when recording, 0 when not recording. @@ -1270,25 +1238,23 @@ MXNET_DLL int MXSetIsNumpyDefaultDtype(bool dtype_flag, bool* prev); * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXAutogradMarkVariables(uint32_t num_var, - NDArrayHandle *var_handles, - uint32_t *reqs_array, - NDArrayHandle *grad_handles); + NDArrayHandle* var_handles, + uint32_t* reqs_array, + NDArrayHandle* grad_handles); /*! * \brief unmark nonleaf NDArrays to free the memory * \param num_var number of variable NDArrays * \param var_handles variable NDArrays * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXAutogradDropGrads(uint32_t num_var, - NDArrayHandle *var_handles); +MXNET_DLL int MXAutogradDropGrads(uint32_t num_var, NDArrayHandle* var_handles); /*! * \brief compute the gradient of outputs w.r.t variabels * \param num_output number of output NDArray * \param output_handles output NDArrays * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXAutogradComputeGradient(uint32_t num_output, - NDArrayHandle* output_handles); +MXNET_DLL int MXAutogradComputeGradient(uint32_t num_output, NDArrayHandle* output_handles); /*! * \brief compute the gradient of outputs w.r.t variabels * \param num_output number of output NDArray @@ -1313,21 +1279,21 @@ MXNET_DLL int MXAutogradBackward(uint32_t num_output, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXAutogradBackwardEx(uint32_t num_output, - NDArrayHandle *output_handles, - NDArrayHandle *ograd_handles, + NDArrayHandle* output_handles, + NDArrayHandle* ograd_handles, uint32_t num_variables, - NDArrayHandle *var_handles, + NDArrayHandle* var_handles, int retain_graph, int create_graph, int is_train, - NDArrayHandle **grad_handles, - int **grad_stypes); + NDArrayHandle** grad_handles, + int** grad_stypes); /* * \brief get the graph constructed by autograd. * \param handle ndarray handle * \param out output symbol handle */ -MXNET_DLL int MXAutogradGetSymbol(NDArrayHandle handle, SymbolHandle *out); +MXNET_DLL int MXAutogradGetSymbol(NDArrayHandle handle, SymbolHandle* out); /*! * \brief create cached operator, allows to choose thread_safe version @@ -1337,7 +1303,7 @@ MXNET_DLL int MXCreateCachedOp(SymbolHandle handle, int num_flags, const char** keys, const char** vals, - CachedOpHandle *out, + CachedOpHandle* out, bool thread_safe DEFAULT(false)); /*! @@ -1348,8 +1314,7 @@ MXNET_DLL int MXFreeCachedOp(CachedOpHandle handle); /*! * \brief get optimized graph from the cached op */ -MXNET_DLL int MXCachedOpGetOptimizedSymbol(CachedOpHandle handle, - SymbolHandle *out); +MXNET_DLL int MXCachedOpGetOptimizedSymbol(CachedOpHandle handle, SymbolHandle* out); /*! * \brief invoke a cached op @@ -1365,11 +1330,11 @@ MXNET_DLL int MXCachedOpGetOptimizedSymbol(CachedOpHandle handle, */ MXNET_DLL int MXInvokeCachedOp(CachedOpHandle handle, int num_inputs, - NDArrayHandle *inputs, + NDArrayHandle* inputs, int default_dev_type, int default_dev_id, - int *num_outputs, - NDArrayHandle **outputs, + int* num_outputs, + NDArrayHandle** outputs, const int** out_stypes); /*! @@ -1384,7 +1349,7 @@ MXNET_DLL int MXCachedOpRegisterOpHook(CachedOpHandle handle, * \param curr returns the current status. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayIsDeferredCompute(int *curr); +MXNET_DLL int MXNDArrayIsDeferredCompute(int* curr); /*! * \brief set whether to enable deferred compute mode @@ -1392,7 +1357,7 @@ MXNET_DLL int MXNDArrayIsDeferredCompute(int *curr); * \param prev returns the previous status before this set. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArraySetIsDeferredCompute(int deferred_compute_enabled, int *prev); +MXNET_DLL int MXNDArraySetIsDeferredCompute(int deferred_compute_enabled, int* prev); /*! * \brief Associate variables with deferred compute arrays @@ -1401,8 +1366,8 @@ MXNET_DLL int MXNDArraySetIsDeferredCompute(int deferred_compute_enabled, int *p * \param num number of arrays and variables respectively * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArraySetDeferredComputeVariable(NDArrayHandle *arrays, - SymbolHandle *variables, +MXNET_DLL int MXNDArraySetDeferredComputeVariable(NDArrayHandle* arrays, + SymbolHandle* variables, int num); /*! @@ -1413,9 +1378,9 @@ MXNET_DLL int MXNDArraySetDeferredComputeVariable(NDArrayHandle *arrays, * Construct a Symbol for the deferred computation graph. output_handles * specifies the outputs of interest which the returned symbol will compute. */ -MXNET_DLL int MXNDArrayGetDeferredComputeSymbol(NDArrayHandle *output_handles, +MXNET_DLL int MXNDArrayGetDeferredComputeSymbol(NDArrayHandle* output_handles, int num_outputs, - SymbolHandle *out); + SymbolHandle* out); /*! * \brief Clear the deferred compute info associated with the ndarrays. @@ -1423,7 +1388,7 @@ MXNET_DLL int MXNDArrayGetDeferredComputeSymbol(NDArrayHandle *output_handles, * \param num number of ndarrays * \return 0 when success, -1 otherwise */ -MXNET_DLL int MXNDArrayClearDeferredCompute(NDArrayHandle *arrays, int num); +MXNET_DLL int MXNDArrayClearDeferredCompute(NDArrayHandle* arrays, int num); //-------------------------------------------- // Part 3: symbolic configuration generation @@ -1434,8 +1399,7 @@ MXNET_DLL int MXNDArrayClearDeferredCompute(NDArrayHandle *arrays, int num); * \param out_array the output operator name array. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXListAllOpNames(uint32_t *out_size, - const char ***out_array); +MXNET_DLL int MXListAllOpNames(uint32_t* out_size, const char*** out_array); /*! * \brief list all the available AtomicSymbolEntry @@ -1443,16 +1407,14 @@ MXNET_DLL int MXListAllOpNames(uint32_t *out_size, * \param out_array the output AtomicSymbolCreator array * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolListAtomicSymbolCreators(uint32_t *out_size, - AtomicSymbolCreator **out_array); +MXNET_DLL int MXSymbolListAtomicSymbolCreators(uint32_t* out_size, AtomicSymbolCreator** out_array); /*! * \brief Get the name of an atomic symbol. * \param creator the AtomicSymbolCreator. * \param name The returned name of the creator. */ -MXNET_DLL int MXSymbolGetAtomicSymbolName(AtomicSymbolCreator creator, - const char **name); +MXNET_DLL int MXSymbolGetAtomicSymbolName(AtomicSymbolCreator creator, const char** name); /*! * \brief Get the input symbols of the graph. @@ -1460,8 +1422,7 @@ MXNET_DLL int MXSymbolGetAtomicSymbolName(AtomicSymbolCreator creator, * \param inputs The input symbols of the graph. * \param input_size the number of input symbols returned. */ -MXNET_DLL int MXSymbolGetInputSymbols(SymbolHandle sym, SymbolHandle **inputs, - int *input_size); +MXNET_DLL int MXSymbolGetInputSymbols(SymbolHandle sym, SymbolHandle** inputs, int* input_size); /*! * \brief Cut a subgraph whose nodes are marked with a subgraph attribute. @@ -1472,8 +1433,7 @@ MXNET_DLL int MXSymbolGetInputSymbols(SymbolHandle sym, SymbolHandle **inputs, * \param inputs The nodes that connect to the subgraph. * \param input_size The number of such nodes. */ -MXNET_DLL int MXSymbolCutSubgraph(SymbolHandle sym, SymbolHandle **inputs, - int *input_size); +MXNET_DLL int MXSymbolCutSubgraph(SymbolHandle sym, SymbolHandle** inputs, int* input_size); /*! * \brief Get the detailed information about atomic symbol. @@ -1493,14 +1453,14 @@ MXNET_DLL int MXSymbolCutSubgraph(SymbolHandle sym, SymbolHandle **inputs, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolGetAtomicSymbolInfo(AtomicSymbolCreator creator, - const char **name, - const char **description, - uint32_t *num_args, - const char ***arg_names, - const char ***arg_type_infos, - const char ***arg_descriptions, - const char **key_var_num_args, - const char **return_type DEFAULT(NULL)); + const char** name, + const char** description, + uint32_t* num_args, + const char*** arg_names, + const char*** arg_type_infos, + const char*** arg_descriptions, + const char** key_var_num_args, + const char** return_type DEFAULT(NULL)); /*! * \brief Create an AtomicSymbol. * @@ -1516,16 +1476,16 @@ MXNET_DLL int MXSymbolGetAtomicSymbolInfo(AtomicSymbolCreator creator, */ MXNET_DLL int MXSymbolCreateAtomicSymbol(AtomicSymbolCreator creator, uint32_t num_param, - const char **keys, - const char **vals, - SymbolHandle *out); + const char** keys, + const char** vals, + SymbolHandle* out); /*! * \brief Create a Variable Symbol. * \param name name of the variable * \param out pointer to the created symbol handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolCreateVariable(const char *name, SymbolHandle *out); +MXNET_DLL int MXSymbolCreateVariable(const char* name, SymbolHandle* out); /*! * \brief Create a Symbol by grouping list of symbols together * \param num_symbols number of symbols to be grouped @@ -1533,23 +1493,21 @@ MXNET_DLL int MXSymbolCreateVariable(const char *name, SymbolHandle *out); * \param out pointer to the created symbol handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolCreateGroup(uint32_t num_symbols, - SymbolHandle *symbols, - SymbolHandle *out); +MXNET_DLL int MXSymbolCreateGroup(uint32_t num_symbols, SymbolHandle* symbols, SymbolHandle* out); /*! * \brief Load a symbol from a json file. * \param fname the file name. * \param out the output symbol. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolCreateFromFile(const char *fname, SymbolHandle *out); +MXNET_DLL int MXSymbolCreateFromFile(const char* fname, SymbolHandle* out); /*! * \brief Load a symbol from a json string. * \param json the json string. * \param out the output symbol. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolCreateFromJSON(const char *json, SymbolHandle *out); +MXNET_DLL int MXSymbolCreateFromJSON(const char* json, SymbolHandle* out); /*! * \brief Remove the operators amp_cast and amp_multicast * \param sym_handle the input symbol. @@ -1563,14 +1521,14 @@ MXNET_DLL int MXSymbolRemoveAmpCast(SymbolHandle sym_handle, SymbolHandle* ret_s * \param fname the file name. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolSaveToFile(SymbolHandle symbol, const char *fname); +MXNET_DLL int MXSymbolSaveToFile(SymbolHandle symbol, const char* fname); /*! * \brief Save a symbol into a json string * \param symbol the input symbol. * \param out_json output json string. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolSaveToJSON(SymbolHandle symbol, const char **out_json); +MXNET_DLL int MXSymbolSaveToJSON(SymbolHandle symbol, const char** out_json); /*! * \brief Free the symbol handle. * \param symbol the symbol @@ -1583,14 +1541,14 @@ MXNET_DLL int MXSymbolFree(SymbolHandle symbol); * \param out used to hold the result of copy * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolCopy(SymbolHandle symbol, SymbolHandle *out); +MXNET_DLL int MXSymbolCopy(SymbolHandle symbol, SymbolHandle* out); /*! * \brief Print the content of symbol, used for debug. * \param symbol the symbol * \param out_str pointer to hold the output string of the printing. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolPrint(SymbolHandle symbol, const char **out_str); +MXNET_DLL int MXSymbolPrint(SymbolHandle symbol, const char** out_str); /*! * \brief Get string name from symbol * \param symbol the source symbol @@ -1598,9 +1556,7 @@ MXNET_DLL int MXSymbolPrint(SymbolHandle symbol, const char **out_str); * \param success Whether the result is contained in out. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGetName(SymbolHandle symbol, - const char** out, - int *success); +MXNET_DLL int MXSymbolGetName(SymbolHandle symbol, const char** out, int* success); /*! * \brief Get string attribute from symbol * \param symbol the source symbol @@ -1609,13 +1565,11 @@ MXNET_DLL int MXSymbolGetName(SymbolHandle symbol, * \param success Whether the result is contained in out. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGetAttr(SymbolHandle symbol, - const char* key, - const char** out, - int *success); +MXNET_DLL int MXSymbolGetAttr(SymbolHandle symbol, const char* key, const char** out, int* success); /*! * \brief Set string attribute from symbol. - * NOTE: Setting attribute to a symbol can affect the semantics(mutable/immutable) of symbolic graph. + * NOTE: Setting attribute to a symbol can affect the semantics(mutable/immutable) of symbolic + * graph. * * Safe recommendaton: use immutable graph * - Only allow set attributes during creation of new symbol as optional parameter @@ -1629,9 +1583,7 @@ MXNET_DLL int MXSymbolGetAttr(SymbolHandle symbol, * \param value The value to be saved. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolSetAttr(SymbolHandle symbol, - const char* key, - const char* value); +MXNET_DLL int MXSymbolSetAttr(SymbolHandle symbol, const char* key, const char* value); /*! * \brief Get all attributes from symbol, including all descendents. * \param symbol the source symbol @@ -1639,9 +1591,7 @@ MXNET_DLL int MXSymbolSetAttr(SymbolHandle symbol, * \param out 2*out_size strings representing key value pairs. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolListAttr(SymbolHandle symbol, - uint32_t *out_size, - const char*** out); +MXNET_DLL int MXSymbolListAttr(SymbolHandle symbol, uint32_t* out_size, const char*** out); /*! * \brief Get all attributes from symbol, excluding descendents. * \param symbol the source symbol @@ -1649,9 +1599,7 @@ MXNET_DLL int MXSymbolListAttr(SymbolHandle symbol, * \param out 2*out_size strings representing key value pairs. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolListAttrShallow(SymbolHandle symbol, - uint32_t *out_size, - const char*** out); +MXNET_DLL int MXSymbolListAttrShallow(SymbolHandle symbol, uint32_t* out_size, const char*** out); /*! * \brief List arguments in the symbol. * \param symbol the symbol @@ -1660,8 +1608,8 @@ MXNET_DLL int MXSymbolListAttrShallow(SymbolHandle symbol, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolListArguments(SymbolHandle symbol, - uint32_t *out_size, - const char ***out_str_array); + uint32_t* out_size, + const char*** out_str_array); /*! * \brief List returns in the symbol. @@ -1671,8 +1619,8 @@ MXNET_DLL int MXSymbolListArguments(SymbolHandle symbol, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolListOutputs(SymbolHandle symbol, - uint32_t *out_size, - const char ***out_str_array); + uint32_t* out_size, + const char*** out_str_array); /*! * \brief Get number of outputs of the symbol. @@ -1680,8 +1628,7 @@ MXNET_DLL int MXSymbolListOutputs(SymbolHandle symbol, * \param out_size number of outputs * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGetNumOutputs(SymbolHandle symbol, - uint32_t *output_count); +MXNET_DLL int MXSymbolGetNumOutputs(SymbolHandle symbol, uint32_t* output_count); /*! * \brief Get a symbol that contains all the internals. @@ -1689,24 +1636,21 @@ MXNET_DLL int MXSymbolGetNumOutputs(SymbolHandle symbol, * \param out The output symbol whose outputs are all the internals. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGetInternals(SymbolHandle symbol, - SymbolHandle *out); +MXNET_DLL int MXSymbolGetInternals(SymbolHandle symbol, SymbolHandle* out); /*! * \brief Get a symbol that contains all the inputs. * \param symbol The symbol * \param out The output symbol whose outputs are all the internals. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGetInputs(SymbolHandle symbol, - SymbolHandle *out); +MXNET_DLL int MXSymbolGetInputs(SymbolHandle symbol, SymbolHandle* out); /*! * \brief Get a symbol that contains only direct children. * \param symbol The symbol * \param out The output symbol whose outputs are the direct children. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGetChildren(SymbolHandle symbol, - SymbolHandle *out); +MXNET_DLL int MXSymbolGetChildren(SymbolHandle symbol, SymbolHandle* out); /*! * \brief Get index-th outputs of the symbol. * \param symbol The symbol @@ -1714,9 +1658,7 @@ MXNET_DLL int MXSymbolGetChildren(SymbolHandle symbol, * \param out The output symbol whose outputs are the index-th symbol. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGetOutput(SymbolHandle symbol, - uint32_t index, - SymbolHandle *out); +MXNET_DLL int MXSymbolGetOutput(SymbolHandle symbol, uint32_t index, SymbolHandle* out); /*! * \brief List auxiliary states in the symbol. @@ -1726,8 +1668,8 @@ MXNET_DLL int MXSymbolGetOutput(SymbolHandle symbol, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolListAuxiliaryStates(SymbolHandle symbol, - uint32_t *out_size, - const char ***out_str_array); + uint32_t* out_size, + const char*** out_str_array); /*! * \brief Compose the symbol on other symbols. @@ -1744,7 +1686,7 @@ MXNET_DLL int MXSymbolListAuxiliaryStates(SymbolHandle symbol, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolCompose(SymbolHandle sym, - const char *name, + const char* name, uint32_t num_args, const char** keys, SymbolHandle* args); @@ -1757,97 +1699,82 @@ MXNET_DLL int MXSymbolCompose(SymbolHandle sym, * \param out the returned symbol that has gradient * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXSymbolGrad(SymbolHandle sym, - uint32_t num_wrt, - const char** wrt, - SymbolHandle* out); +MXNET_DLL int MXSymbolGrad(SymbolHandle sym, uint32_t num_wrt, const char** wrt, SymbolHandle* out); /*! * \brief infer shape of unknown input shapes given the known one. * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data - * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is positional. - * This api is available when MXNet is built with flag - * USE_INT64_TENSOR_SIZE=0 (by default) - * \param sym symbol handle - * \param num_args number of input arguments. - * \param keys the key of keyword args (optional) - * \param arg_ind_ptr the head pointer of the rows in CSR - * \param arg_shape_data the content of the CSR - * \param in_shape_size sizeof the returning array of in_shapes - * \param in_shape_ndim returning array of shape dimensions of eachs input shape. - * \param in_shape_data returning array of pointers to head of the input shape. - * \param out_shape_size sizeof the returning array of out_shapes - * \param out_shape_ndim returning array of shape dimensions of each output shape. - * \param out_shape_data returning array of pointers to head of the output shape. - * \param aux_shape_size sizeof the returning array of aux_shapes - * \param aux_shape_ndim returning array of shape dimensions of each auxiliary shape. - * \param aux_shape_data returning array of pointers to head of the auxiliary shape. - * \param complete whether infer shape completes or more information is needed. - * \return 0 when success, -1 when failure happens + * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is + * positional. This api is available when MXNet is built with flag USE_INT64_TENSOR_SIZE=0 (by + * default) \param sym symbol handle \param num_args number of input arguments. \param keys the key + * of keyword args (optional) \param arg_ind_ptr the head pointer of the rows in CSR \param + * arg_shape_data the content of the CSR \param in_shape_size sizeof the returning array of + * in_shapes \param in_shape_ndim returning array of shape dimensions of eachs input shape. \param + * in_shape_data returning array of pointers to head of the input shape. \param out_shape_size + * sizeof the returning array of out_shapes \param out_shape_ndim returning array of shape + * dimensions of each output shape. \param out_shape_data returning array of pointers to head of the + * output shape. \param aux_shape_size sizeof the returning array of aux_shapes \param + * aux_shape_ndim returning array of shape dimensions of each auxiliary shape. \param aux_shape_data + * returning array of pointers to head of the auxiliary shape. \param complete whether infer shape + * completes or more information is needed. \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym, uint32_t num_args, const char** keys, - const uint32_t *arg_ind_ptr, - const int *arg_shape_data, - uint32_t *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, - uint32_t *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, - uint32_t *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, - int *complete); + const uint32_t* arg_ind_ptr, + const int* arg_shape_data, + uint32_t* in_shape_size, + const int** in_shape_ndim, + const int*** in_shape_data, + uint32_t* out_shape_size, + const int** out_shape_ndim, + const int*** out_shape_data, + uint32_t* aux_shape_size, + const int** aux_shape_ndim, + const int*** aux_shape_data, + int* complete); /*! * \brief infer shape of unknown input shapes given the known one. * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data - * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is positional. - * This api is available when MXNet is built with flag - * USE_INT64_TENSOR_SIZE=1 (not default) i.e. Large Tensor Support - * \param sym symbol handle - * \param num_args number of input arguments. - * \param keys the key of keyword args (optional) - * \param arg_ind_ptr the head pointer of the rows in CSR - * \param arg_shape_data the content of the CSR - * \param in_shape_size sizeof the returning array of in_shapes - * \param in_shape_ndim returning array of shape dimensions of each input shape. - * \param in_shape_data returning array of pointers to head of the input shape. - * \param out_shape_size sizeof the returning array of out_shapes - * \param out_shape_ndim returning array of shape dimensions of each output shape. - * \param out_shape_data returning array of pointers to head of the output shape. - * \param aux_shape_size sizeof the returning array of aux_shapes - * \param aux_shape_ndim returning array of shape dimensions of each auxiliary shape. - * \param aux_shape_data returning array of pointers to head of the auxiliary shape. - * \param complete whether infer shape completes or more information is needed. - * \return 0 when success, -1 when failure happens + * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is + * positional. This api is available when MXNet is built with flag USE_INT64_TENSOR_SIZE=1 (not + * default) i.e. Large Tensor Support \param sym symbol handle \param num_args number of input + * arguments. \param keys the key of keyword args (optional) \param arg_ind_ptr the head pointer of + * the rows in CSR \param arg_shape_data the content of the CSR \param in_shape_size sizeof the + * returning array of in_shapes \param in_shape_ndim returning array of shape dimensions of each + * input shape. \param in_shape_data returning array of pointers to head of the input shape. \param + * out_shape_size sizeof the returning array of out_shapes \param out_shape_ndim returning array of + * shape dimensions of each output shape. \param out_shape_data returning array of pointers to head + * of the output shape. \param aux_shape_size sizeof the returning array of aux_shapes \param + * aux_shape_ndim returning array of shape dimensions of each auxiliary shape. \param aux_shape_data + * returning array of pointers to head of the auxiliary shape. \param complete whether infer shape + * completes or more information is needed. \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolInferShape64(SymbolHandle sym, uint32_t num_args, const char** keys, - const int64_t *arg_ind_ptr, - const int64_t *arg_shape_data, - size_t *in_shape_size, - const int **in_shape_ndim, - const int64_t ***in_shape_data, - size_t *out_shape_size, - const int **out_shape_ndim, - const int64_t ***out_shape_data, - size_t *aux_shape_size, - const int **aux_shape_ndim, - const int64_t ***aux_shape_data, - int *complete); + const int64_t* arg_ind_ptr, + const int64_t* arg_shape_data, + size_t* in_shape_size, + const int** in_shape_ndim, + const int64_t*** in_shape_data, + size_t* out_shape_size, + const int** out_shape_ndim, + const int64_t*** out_shape_data, + size_t* aux_shape_size, + const int** aux_shape_ndim, + const int64_t*** aux_shape_data, + int* complete); /*! * \brief partially infer shape of unknown input shapes given the known one. * * Return partially inferred results if not all shapes could be inferred. * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data - * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is positional. - * This api is available when MXNet is built with flag - * USE_INT64_TENSOR_SIZE=0 (by default) + * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is + * positional. This api is available when MXNet is built with flag USE_INT64_TENSOR_SIZE=0 (by + * default) * * \param sym symbol handle * \param num_args number of input arguments. @@ -1869,27 +1796,27 @@ MXNET_DLL int MXSymbolInferShape64(SymbolHandle sym, MXNET_DLL int MXSymbolInferShapePartial(SymbolHandle sym, uint32_t num_args, const char** keys, - const uint32_t *arg_ind_ptr, - const int *arg_shape_data, - uint32_t *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, - uint32_t *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, - uint32_t *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, - int *complete); + const uint32_t* arg_ind_ptr, + const int* arg_shape_data, + uint32_t* in_shape_size, + const int** in_shape_ndim, + const int*** in_shape_data, + uint32_t* out_shape_size, + const int** out_shape_ndim, + const int*** out_shape_data, + uint32_t* aux_shape_size, + const int** aux_shape_ndim, + const int*** aux_shape_data, + int* complete); /*! * \brief partially infer shape of unknown input shapes given the known one. * * Return partially inferred results if not all shapes could be inferred. * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data - * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is positional. - * This api is available when MXNet is built with flag - * USE_INT64_TENSOR_SIZE=1 (not default) i.e. Large Tensor Support + * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is + * positional. This api is available when MXNet is built with flag USE_INT64_TENSOR_SIZE=1 (not + * default) i.e. Large Tensor Support * * \param sym symbol handle * \param num_args number of input arguments. @@ -1911,23 +1838,24 @@ MXNET_DLL int MXSymbolInferShapePartial(SymbolHandle sym, MXNET_DLL int MXSymbolInferShapePartial64(SymbolHandle sym, uint32_t num_args, const char** keys, - const int64_t *arg_ind_ptr, - const int64_t *arg_shape_data, - size_t *in_shape_size, - const int **in_shape_ndim, - const int64_t ***in_shape_data, - size_t *out_shape_size, - const int **out_shape_ndim, - const int64_t ***out_shape_data, - size_t *aux_shape_size, - const int **aux_shape_ndim, - const int64_t ***aux_shape_data, - int *complete); + const int64_t* arg_ind_ptr, + const int64_t* arg_shape_data, + size_t* in_shape_size, + const int** in_shape_ndim, + const int64_t*** in_shape_data, + size_t* out_shape_size, + const int** out_shape_ndim, + const int64_t*** out_shape_data, + size_t* aux_shape_size, + const int** aux_shape_ndim, + const int64_t*** aux_shape_data, + int* complete); /*! * \brief infer type of unknown input types given the known one. * The types are packed into a CSR matrix represented by arg_ind_ptr and arg_type_data - * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is positional. + * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is + * positional. * * \param sym symbol handle * \param num_args numbe of input arguments. @@ -1945,21 +1873,22 @@ MXNET_DLL int MXSymbolInferShapePartial64(SymbolHandle sym, MXNET_DLL int MXSymbolInferType(SymbolHandle sym, uint32_t num_args, const char** keys, - const int *arg_type_data, - uint32_t *in_type_size, - const int **in_type_data, - uint32_t *out_type_size, - const int **out_type_data, - uint32_t *aux_type_size, - const int **aux_type_data, - int *complete); + const int* arg_type_data, + uint32_t* in_type_size, + const int** in_type_data, + uint32_t* out_type_size, + const int** out_type_data, + uint32_t* aux_type_size, + const int** aux_type_data, + int* complete); /*! * \brief partially infer type of unknown input types given the known one. * * Return partially inferred results if not all types could be inferred. * The types are packed into a CSR matrix represented by arg_ind_ptr and arg_type_data - * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is positional. + * The call will be treated as a kwargs call if key != NULL or num_args==0, otherwise it is + * positional. * * \param sym symbol handle * \param num_args numbe of input arguments. @@ -1977,14 +1906,14 @@ MXNET_DLL int MXSymbolInferType(SymbolHandle sym, MXNET_DLL int MXSymbolInferTypePartial(SymbolHandle sym, uint32_t num_args, const char** keys, - const int *arg_type_data, - uint32_t *in_type_size, - const int **in_type_data, - uint32_t *out_type_size, - const int **out_type_data, - uint32_t *aux_type_size, - const int **aux_type_data, - int *complete); + const int* arg_type_data, + uint32_t* in_type_size, + const int** in_type_data, + uint32_t* out_type_size, + const int** out_type_data, + uint32_t* aux_type_size, + const int** aux_type_data, + int* complete); /*! * \brief Convert a symbol into a quantized symbol where FP32 operators are replaced with INT8 @@ -1993,45 +1922,44 @@ MXNET_DLL int MXSymbolInferTypePartial(SymbolHandle sym, * \param dev_type device type * \param num_excluded_sym_names number of layers excluded from being quantized in the input symbol * \param excluded_sym_names node names to be excluded from being quantized - * \param num_excluded_op_names number of operators excluded from being quantized in the input symbol - * \param excluded_op_names operator names to be excluded from being quantized - * \param num_offline number of parameters that are quantized offline - * \param offline_params array of c strings representing the names of params quantized offline - * \param quantized_dtype the quantized destination type for input data - * \param calib_quantize **Deprecated**. quantize op will always be calibrated if could - * \param quantize_mode quantize mode to be used in quantize pass - * \param quantize_granularity quantize granularity, tensor-wise or channel-wise - * \param out_num_calib_names return the number of nodes to be calibrated - * \param out_calib_names return the node names to be calibrated + * \param num_excluded_op_names number of operators excluded from being quantized in the input + * symbol \param excluded_op_names operator names to be excluded from being quantized \param + * num_offline number of parameters that are quantized offline \param offline_params array of c + * strings representing the names of params quantized offline \param quantized_dtype the quantized + * destination type for input data \param calib_quantize **Deprecated**. quantize op will always be + * calibrated if could \param quantize_mode quantize mode to be used in quantize pass \param + * quantize_granularity quantize granularity, tensor-wise or channel-wise \param out_num_calib_names + * return the number of nodes to be calibrated \param out_calib_names return the node names to be + * calibrated */ MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle, - SymbolHandle *ret_sym_handle, + SymbolHandle* ret_sym_handle, const int* dev_type, const uint32_t num_excluded_sym_names, - const char **excluded_sym_names, + const char** excluded_sym_names, const uint32_t num_excluded_op_names, - const char **excluded_op_names, - const uint32_t num_offline, const char **offline_params, - const char *quantized_dtype, const bool calib_quantize, - const char *quantize_mode, const char *quantize_granularity, - uint32_t* out_num_calib_names, const char ***out_calib_names); - -/*! - * \brief Convert a symbol into a mixed precision symbol with cast operators for target dtype casting - * \param sym_handle symbol to be converted - * \param ret_sym_handle mixed precision symbol result - * \param num_args number of arguments for known dtypes - * \param arg_type_data arg types of the arguments - * \param target_dtype target_dtype for mixed precision symbol - * \param cast_optional_params whether to cast optional params to target_dtype - * \param num_target_dtype_op_names number of ops to be casted to target_dtype - * \param num_fp32_op_names number of ops to be casted to FP32 - * \param num_widest_dtype_op_names number of ops to be casted to widest dtype - * \param num_conditional_fp32_op_names number of ops to be casted to FP32 based on a condition - * \param num_excluded_symbols number of symbols to be excluded from casting - * \param num_model_params number of model parameters - * \param num_widest_dtype_op_names number of ops to be casted to the widest dtype - * \param num_conditional_fp32_op_names number of ops to be cast to fp32 based on precision + const char** excluded_op_names, + const uint32_t num_offline, + const char** offline_params, + const char* quantized_dtype, + const bool calib_quantize, + const char* quantize_mode, + const char* quantize_granularity, + uint32_t* out_num_calib_names, + const char*** out_calib_names); + +/*! + * \brief Convert a symbol into a mixed precision symbol with cast operators for target dtype + * casting \param sym_handle symbol to be converted \param ret_sym_handle mixed precision symbol + * result \param num_args number of arguments for known dtypes \param arg_type_data arg types of the + * arguments \param target_dtype target_dtype for mixed precision symbol \param cast_optional_params + * whether to cast optional params to target_dtype \param num_target_dtype_op_names number of ops to + * be casted to target_dtype \param num_fp32_op_names number of ops to be casted to FP32 \param + * num_widest_dtype_op_names number of ops to be casted to widest dtype \param + * num_conditional_fp32_op_names number of ops to be casted to FP32 based on a condition \param + * num_excluded_symbols number of symbols to be excluded from casting \param num_model_params number + * of model parameters \param num_widest_dtype_op_names number of ops to be casted to the widest + * dtype \param num_conditional_fp32_op_names number of ops to be cast to fp32 based on precision * \param target_dtype_op_names op names to be casted to target_dtype * \param fp32_op_names op names to be casted to fp32 * \param widest_dtype_op_names names to be casted to widest dtype @@ -2043,7 +1971,7 @@ MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle, * \param model_param_names names for model parameters */ MXNET_DLL int MXReducePrecisionSymbol(SymbolHandle sym_handle, - SymbolHandle *ret_sym_handle, + SymbolHandle* ret_sym_handle, uint32_t num_args, const int* arg_type_data, uint32_t num_ind_ptr, @@ -2056,15 +1984,15 @@ MXNET_DLL int MXReducePrecisionSymbol(SymbolHandle sym_handle, const uint32_t num_conditional_fp32_op_names, const uint32_t num_excluded_symbols, const uint32_t num_model_params, - const char **target_dtype_op_names, - const char **fp32_op_names, - const char **widest_dtype_op_names, - const char **conditional_fp32_op_names, - const char **excluded_symbols, - const char **conditional_param_names, - const char **conditional_param_vals, - const char **model_param_names, - const char **arg_names); + const char** target_dtype_op_names, + const char** fp32_op_names, + const char** widest_dtype_op_names, + const char** conditional_fp32_op_names, + const char** excluded_symbols, + const char** conditional_param_names, + const char** conditional_param_vals, + const char** model_param_names, + const char** arg_names); /*! * \brief Set calibration table to node attributes in the sym * \param sym_handle symbol whose node attributes are to be set by calibration table @@ -2087,15 +2015,16 @@ MXNET_DLL int MXSetCalibTableToQuantizedSymbol(SymbolHandle qsym_handle, * \param backend backend names for subgraph pass * \param ret_sym_handle returned symbol */ -MXNET_DLL int MXGenBackendSubgraph(SymbolHandle sym_handle, const char *backend, - SymbolHandle *ret_sym_handle); +MXNET_DLL int MXGenBackendSubgraph(SymbolHandle sym_handle, + const char* backend, + SymbolHandle* ret_sym_handle); /*! * \brief Generate atomic symbol (able to be composed) from a source symbol * \param sym_handle source symbol * \param ret_sym_handle returned atomic symbol */ -MXNET_DLL int MXGenAtomicSymbolFromSymbol(SymbolHandle sym_handle, SymbolHandle *ret_sym_handle); +MXNET_DLL int MXGenAtomicSymbolFromSymbol(SymbolHandle sym_handle, SymbolHandle* ret_sym_handle); /*! * \brief Partitions symbol for given backend, potentially creating subgraphs * \param sym_handle symbol to be partitioned @@ -2156,7 +2085,6 @@ MXNET_DLL int MXOptimizeForBackend(SymbolHandle sym_handle, NDArrayHandle** new_aux_handle, char*** new_aux_names_handle); - //-------------------------------------------- // Part 5: IO Interface //-------------------------------------------- @@ -2166,8 +2094,7 @@ MXNET_DLL int MXOptimizeForBackend(SymbolHandle sym_handle, * \param out_array the output iteratos entries * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXListDataIters(uint32_t *out_size, - DataIterCreator **out_array); +MXNET_DLL int MXListDataIters(uint32_t* out_size, DataIterCreator** out_array); /*! * \brief Init an iterator, init with parameters * the array size of passed in arguments @@ -2180,9 +2107,9 @@ MXNET_DLL int MXListDataIters(uint32_t *out_size, */ MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, uint32_t num_param, - const char **keys, - const char **vals, - DataIterHandle *out); + const char** keys, + const char** vals, + DataIterHandle* out); /*! * \brief Get the detailed information about data iterator. * \param creator the DataIterCreator. @@ -2195,12 +2122,12 @@ MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterGetIterInfo(DataIterCreator creator, - const char **name, - const char **description, - uint32_t *num_args, - const char ***arg_names, - const char ***arg_type_infos, - const char ***arg_descriptions); + const char** name, + const char** description, + uint32_t* num_args, + const char*** arg_names, + const char*** arg_type_infos, + const char*** arg_descriptions); /*! * \brief Free the handle to the IO module * \param handle the handle pointer to the data iterator @@ -2213,8 +2140,7 @@ MXNET_DLL int MXDataIterFree(DataIterHandle handle); * \param out return value of next * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDataIterNext(DataIterHandle handle, - int *out); +MXNET_DLL int MXDataIterNext(DataIterHandle handle, int* out); /*! * \brief Call iterator.Reset * \param handle the handle to iterator @@ -2227,16 +2153,14 @@ MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDataIterGetLenHint(DataIterHandle handle, - int64_t *len); +MXNET_DLL int MXDataIterGetLenHint(DataIterHandle handle, int64_t* len); /*! * \brief Get the handle to the NDArray of underlying data * \param handle the handle pointer to the data iterator * \param out handle to underlying data NDArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDataIterGetData(DataIterHandle handle, - NDArrayHandle *out); +MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NDArrayHandle* out); /*! * \brief Get the image index by array. * \param handle the handle pointer to the data iterator @@ -2244,17 +2168,14 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, * \param out_size output size of the array. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDataIterGetIndex(DataIterHandle handle, - uint64_t **out_index, - uint64_t *out_size); +MXNET_DLL int MXDataIterGetIndex(DataIterHandle handle, uint64_t** out_index, uint64_t* out_size); /*! * \brief Get the padding number in current data batch * \param handle the handle pointer to the data iterator * \param pad pad number ptr * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDataIterGetPadNum(DataIterHandle handle, - int *pad); +MXNET_DLL int MXDataIterGetPadNum(DataIterHandle handle, int* pad); /*! * \brief Get the handle to the NDArray of underlying label @@ -2262,8 +2183,7 @@ MXNET_DLL int MXDataIterGetPadNum(DataIterHandle handle, * \param out the handle to underlying label NDArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, - NDArrayHandle *out); +MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NDArrayHandle* out); /*! * \brief Get the handles to specified underlying ndarrays of index * \param handle the handle pointer to the data iterator @@ -2271,9 +2191,7 @@ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, * \param out the handle to an array of NDArrays that stores pointers to handles * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDataIterGetItems(DataIterHandle handle, - int* num_outputs, - NDArrayHandle **outputs); +MXNET_DLL int MXDataIterGetItems(DataIterHandle handle, int* num_outputs, NDArrayHandle** outputs); /*! * \brief List all the available dataset entries @@ -2281,8 +2199,7 @@ MXNET_DLL int MXDataIterGetItems(DataIterHandle handle, * \param out_array the output dataset entries * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXListDatasets(uint32_t *out_size, - DatasetCreator **out_array); +MXNET_DLL int MXListDatasets(uint32_t* out_size, DatasetCreator** out_array); /*! * \brief Init an dataset, init with parameters * the array size of passed in arguments @@ -2295,9 +2212,9 @@ MXNET_DLL int MXListDatasets(uint32_t *out_size, */ MXNET_DLL int MXDatasetCreateDataset(DatasetCreator handle, uint32_t num_param, - const char **keys, - const char **vals, - DatasetHandle *out); + const char** keys, + const char** vals, + DatasetHandle* out); /*! * \brief Get the detailed information about dataset. * \param creator the DatasetCreator. @@ -2310,12 +2227,12 @@ MXNET_DLL int MXDatasetCreateDataset(DatasetCreator handle, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDatasetGetDatasetInfo(DatasetCreator creator, - const char **name, - const char **description, - uint32_t *num_args, - const char ***arg_names, - const char ***arg_type_infos, - const char ***arg_descriptions); + const char** name, + const char** description, + uint32_t* num_args, + const char*** arg_names, + const char*** arg_type_infos, + const char*** arg_descriptions); /*! * \brief Free the handle to the IO module * \param handle the handle pointer to the dataset @@ -2328,8 +2245,7 @@ MXNET_DLL int MXDatasetFree(DatasetHandle handle); * \param out return value of GetLen * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXDatasetGetLen(DatasetHandle handle, - uint64_t *out); +MXNET_DLL int MXDatasetGetLen(DatasetHandle handle, uint64_t* out); /*! * \brief Get Output NDArray given specified indices * \param handle the handle to dataset @@ -2342,7 +2258,7 @@ MXNET_DLL int MXDatasetGetLen(DatasetHandle handle, MXNET_DLL int MXDatasetGetItems(DatasetHandle handle, uint64_t index, int* num_outputs, - NDArrayHandle **outputs); + NDArrayHandle** outputs); /*! * \brief List all the available batchify function entries @@ -2350,8 +2266,7 @@ MXNET_DLL int MXDatasetGetItems(DatasetHandle handle, * \param out_array the output batchify function entries * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXListBatchifyFunctions(uint32_t *out_size, - BatchifyFunctionCreator **out_array); +MXNET_DLL int MXListBatchifyFunctions(uint32_t* out_size, BatchifyFunctionCreator** out_array); /*! * \brief Init an batchify function, init with parameters * the array size of passed in arguments @@ -2363,10 +2278,10 @@ MXNET_DLL int MXListBatchifyFunctions(uint32_t *out_size, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXBatchifyFunctionCreateFunction(BatchifyFunctionCreator handle, - uint32_t num_param, - const char **keys, - const char **vals, - BatchifyFunctionHandle *out); + uint32_t num_param, + const char** keys, + const char** vals, + BatchifyFunctionHandle* out); /*! * \brief Get the detailed information about batchify function. * \param creator the batchifyFunctionCreator. @@ -2379,12 +2294,12 @@ MXNET_DLL int MXBatchifyFunctionCreateFunction(BatchifyFunctionCreator handle, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXBatchifyFunctionGetFunctionInfo(BatchifyFunctionCreator creator, - const char **name, - const char **description, - uint32_t *num_args, - const char ***arg_names, - const char ***arg_type_infos, - const char ***arg_descriptions); + const char** name, + const char** description, + uint32_t* num_args, + const char*** arg_names, + const char*** arg_type_infos, + const char*** arg_descriptions); /*! * \brief Invoke the Batchify Function * \param handle the handle pointer to the batchify function @@ -2393,12 +2308,12 @@ MXNET_DLL int MXBatchifyFunctionGetFunctionInfo(BatchifyFunctionCreator creator, * \param inputs the pointers to input ndarrays * \param ouptuts the pointers to output ndarrays * \return 0 when success, -1 when failure happens - */ + */ MXNET_DLL int MXBatchifyFunctionInvoke(BatchifyFunctionHandle handle, int batch_size, int num_output, - NDArrayHandle *inputs, - NDArrayHandle **outputs); + NDArrayHandle* inputs, + NDArrayHandle** outputs); /*! * \brief Free the handle to the IO module * \param handle the handle pointer to the batchify function @@ -2414,10 +2329,7 @@ MXNET_DLL int MXBatchifyFunctionFree(BatchifyFunctionHandle handle); * \param keys environment keys * \param vals environment values */ -MXNET_DLL int MXInitPSEnv(uint32_t num_vars, - const char **keys, - const char **vals); - +MXNET_DLL int MXInitPSEnv(uint32_t num_vars, const char** keys, const char** vals); /*! * \brief Create a kvstore @@ -2425,8 +2337,7 @@ MXNET_DLL int MXInitPSEnv(uint32_t num_vars, * \param out The output type of KVStore * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreCreate(const char *type, - KVStoreHandle *out); +MXNET_DLL int MXKVStoreCreate(const char* type, KVStoreHandle* out); /*! * \brief Set parameters to use low-bit compressed gradients @@ -2690,10 +2601,7 @@ MXNET_DLL int MXKVStorePushPullEx(KVStoreHandle handle, * \param local the value stored on local on this key * \param handle The additional handle to the updater */ -typedef void (MXKVStoreUpdater)(int key, - NDArrayHandle recv, - NDArrayHandle local, - void *handle); +typedef void(MXKVStoreUpdater)(int key, NDArrayHandle recv, NDArrayHandle local, void* handle); /*! * \brief user-defined updater for the kvstore with string keys * It's this updater's responsibility to delete \a recv and \a local @@ -2702,10 +2610,10 @@ typedef void (MXKVStoreUpdater)(int key, * \param local the value stored on local on this key * \param handle The additional handle to the updater */ -typedef void (MXKVStoreStrUpdater)(const char* key, - NDArrayHandle recv, - NDArrayHandle local, - void *handle); +typedef void(MXKVStoreStrUpdater)(const char* key, + NDArrayHandle recv, + NDArrayHandle local, + void* handle); /*! * \brief register a push updater * \param handle handle to the KVStore @@ -2715,7 +2623,7 @@ typedef void (MXKVStoreStrUpdater)(const char* key, */ MXNET_DLL int MXKVStoreSetUpdater(KVStoreHandle handle, MXKVStoreUpdater updater, - void *updater_handle); + void* updater_handle); /*! * \brief register a push updater with int keys and one with string keys * \param handle handle to the KVStore @@ -2727,15 +2635,14 @@ MXNET_DLL int MXKVStoreSetUpdater(KVStoreHandle handle, MXNET_DLL int MXKVStoreSetUpdaterEx(KVStoreHandle handle, MXKVStoreUpdater updater, MXKVStoreStrUpdater str_updater, - void *updater_handle); + void* updater_handle); /*! * \brief get the type of the kvstore * \param handle handle to the KVStore * \param type a string type * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreGetType(KVStoreHandle handle, - const char** type); +MXNET_DLL int MXKVStoreGetType(KVStoreHandle handle, const char** type); //-------------------------------------------- // Part 6: advanced KVStore for multi-machines //-------------------------------------------- @@ -2747,8 +2654,7 @@ MXNET_DLL int MXKVStoreGetType(KVStoreHandle handle, * \param ret the node rank * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreGetRank(KVStoreHandle handle, - int *ret); +MXNET_DLL int MXKVStoreGetRank(KVStoreHandle handle, int* ret); /** * \brief return The number of nodes in this group, which is @@ -2759,31 +2665,28 @@ MXNET_DLL int MXKVStoreGetRank(KVStoreHandle handle, * \param ret the group size * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreGetGroupSize(KVStoreHandle handle, - int *ret); +MXNET_DLL int MXKVStoreGetGroupSize(KVStoreHandle handle, int* ret); /** * \brief return whether or not this process is a worker node. * \param ret 1 for yes, 0 for no * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreIsWorkerNode(int *ret); - +MXNET_DLL int MXKVStoreIsWorkerNode(int* ret); /** * \brief return whether or not this process is a server node. * \param ret 1 for yes, 0 for no * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreIsServerNode(int *ret); - +MXNET_DLL int MXKVStoreIsServerNode(int* ret); /** * \brief return whether or not this process is a scheduler node. * \param ret 1 for yes, 0 for no * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreIsSchedulerNode(int *ret); +MXNET_DLL int MXKVStoreIsSchedulerNode(int* ret); /** * \brief global barrier among all worker machines @@ -2800,8 +2703,7 @@ MXNET_DLL int MXKVStoreBarrier(KVStoreHandle handle); * \param barrier_before_exit whether to do barrier when kvstore finalize * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXKVStoreSetBarrierBeforeExit(KVStoreHandle handle, - const int barrier_before_exit); +MXNET_DLL int MXKVStoreSetBarrierBeforeExit(KVStoreHandle handle, const int barrier_before_exit); /** * \brief the prototype of a server controller @@ -2809,9 +2711,7 @@ MXNET_DLL int MXKVStoreSetBarrierBeforeExit(KVStoreHandle handle, * \param body the body of the command * \param controller_handle helper handle for implementing controller */ -typedef void (MXKVStoreServerController)(int head, - const char *body, - void *controller_handle); +typedef void(MXKVStoreServerController)(int head, const char* body, void* controller_handle); /** * \brief Run as server (or scheduler) @@ -2822,7 +2722,7 @@ typedef void (MXKVStoreServerController)(int head, */ MXNET_DLL int MXKVStoreRunServer(KVStoreHandle handle, MXKVStoreServerController controller, - void *controller_handle); + void* controller_handle); /** * \brief Send a command to all server nodes @@ -2847,7 +2747,7 @@ MXNET_DLL int MXKVStoreSendCommmandToServers(KVStoreHandle handle, */ MXNET_DLL int MXKVStoreGetNumDeadNode(KVStoreHandle handle, const int node_id, - int *number, + int* number, const int timeout_sec DEFAULT(60)); /** @@ -2855,14 +2755,14 @@ MXNET_DLL int MXKVStoreGetNumDeadNode(KVStoreHandle handle, * \param uri path to file * \param out handle pointer to the created object * \return 0 when success, -1 when failure happens -*/ -MXNET_DLL int MXRecordIOWriterCreate(const char *uri, RecordIOHandle *out); + */ +MXNET_DLL int MXRecordIOWriterCreate(const char* uri, RecordIOHandle* out); /** * \brief Delete a RecordIO writer object * \param handle handle to RecordIO object * \return 0 when success, -1 when failure happens -*/ + */ MXNET_DLL int MXRecordIOWriterFree(RecordIOHandle handle); /** @@ -2871,31 +2771,30 @@ MXNET_DLL int MXRecordIOWriterFree(RecordIOHandle handle); * \param buf buffer to write * \param size size of buffer * \return 0 when success, -1 when failure happens -*/ -MXNET_DLL int MXRecordIOWriterWriteRecord(RecordIOHandle handle, - const char *buf, size_t size); + */ +MXNET_DLL int MXRecordIOWriterWriteRecord(RecordIOHandle handle, const char* buf, size_t size); /** * \brief Get the current writer pointer position * \param handle handle to RecordIO object * \param pos handle to output position * \return 0 when success, -1 when failure happens -*/ -MXNET_DLL int MXRecordIOWriterTell(RecordIOHandle handle, size_t *pos); + */ +MXNET_DLL int MXRecordIOWriterTell(RecordIOHandle handle, size_t* pos); /** * \brief Create a RecordIO reader object * \param uri path to file * \param out handle pointer to the created object * \return 0 when success, -1 when failure happens -*/ -MXNET_DLL int MXRecordIOReaderCreate(const char *uri, RecordIOHandle *out); + */ +MXNET_DLL int MXRecordIOReaderCreate(const char* uri, RecordIOHandle* out); /** * \brief Delete a RecordIO reader object * \param handle handle to RecordIO object * \return 0 when success, -1 when failure happens -*/ + */ MXNET_DLL int MXRecordIOReaderFree(RecordIOHandle handle); /** @@ -2904,16 +2803,15 @@ MXNET_DLL int MXRecordIOReaderFree(RecordIOHandle handle); * \param buf pointer to return buffer * \param size point to size of buffer * \return 0 when success, -1 when failure happens -*/ -MXNET_DLL int MXRecordIOReaderReadRecord(RecordIOHandle handle, - char const **buf, size_t *size); + */ +MXNET_DLL int MXRecordIOReaderReadRecord(RecordIOHandle handle, char const** buf, size_t* size); /** * \brief Set the current reader pointer position * \param handle handle to RecordIO object * \param pos target position * \return 0 when success, -1 when failure happens -*/ + */ MXNET_DLL int MXRecordIOReaderSeek(RecordIOHandle handle, size_t pos); /** @@ -2921,22 +2819,30 @@ MXNET_DLL int MXRecordIOReaderSeek(RecordIOHandle handle, size_t pos); * \param handle handle to RecordIO object * \param pos handle to output position * \return 0 when success, -1 when failure happens -*/ -MXNET_DLL int MXRecordIOReaderTell(RecordIOHandle handle, size_t *pos); + */ +MXNET_DLL int MXRecordIOReaderTell(RecordIOHandle handle, size_t* pos); /** * \brief Create a MXRtc object -*/ -MXNET_DLL int MXRtcCreate(char* name, uint32_t num_input, uint32_t num_output, - char** input_names, char** output_names, - NDArrayHandle* inputs, NDArrayHandle* outputs, - char* kernel, RtcHandle *out); + */ +MXNET_DLL int MXRtcCreate(char* name, + uint32_t num_input, + uint32_t num_output, + char** input_names, + char** output_names, + NDArrayHandle* inputs, + NDArrayHandle* outputs, + char* kernel, + RtcHandle* out); /** * \brief Run cuda kernel -*/ -MXNET_DLL int MXRtcPush(RtcHandle handle, uint32_t num_input, uint32_t num_output, - NDArrayHandle* inputs, NDArrayHandle* outputs, + */ +MXNET_DLL int MXRtcPush(RtcHandle handle, + uint32_t num_input, + uint32_t num_output, + NDArrayHandle* inputs, + NDArrayHandle* outputs, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, @@ -2946,7 +2852,7 @@ MXNET_DLL int MXRtcPush(RtcHandle handle, uint32_t num_input, uint32_t num_outpu /** * \brief Delete a MXRtc object -*/ + */ MXNET_DLL int MXRtcFree(RtcHandle handle); /* * \brief register custom operators from frontend. @@ -2962,9 +2868,11 @@ MXNET_DLL int MXCustomOpRegister(const char* op_type, CustomOpPropCreator creato * \param outputs handle to output NDArrays. * \param callbacks callbacks for backward function. */ -MXNET_DLL int MXCustomFunctionRecord(int num_inputs, NDArrayHandle *inputs, - int num_outputs, NDArrayHandle *outputs, - struct MXCallbackList *callbacks); +MXNET_DLL int MXCustomFunctionRecord(int num_inputs, + NDArrayHandle* inputs, + int num_outputs, + NDArrayHandle* outputs, + struct MXCallbackList* callbacks); /* * \brief create cuda rtc module * \param source cuda source code @@ -2974,9 +2882,12 @@ MXNET_DLL int MXCustomFunctionRecord(int num_inputs, NDArrayHandle *inputs, * \param exported function names * \param out handle to created module */ -MXNET_DLL int MXRtcCudaModuleCreate(const char* source, int num_options, - const char** options, int num_exports, - const char** exports, CudaModuleHandle *out); +MXNET_DLL int MXRtcCudaModuleCreate(const char* source, + int num_options, + const char** options, + int num_exports, + const char** exports, + CudaModuleHandle* out); /* * \brief delete cuda rtc module * \param handle handle to cuda module @@ -2992,9 +2903,13 @@ MXNET_DLL int MXRtcCudaModuleFree(CudaModuleHandle handle); * \param arg_types data type of arguments * \param out created kernel */ -MXNET_DLL int MXRtcCudaKernelCreate(CudaModuleHandle handle, const char* name, - int num_args, int* is_ndarray, int* is_const, - int* arg_types, CudaKernelHandle *out); +MXNET_DLL int MXRtcCudaKernelCreate(CudaModuleHandle handle, + const char* name, + int num_args, + int* is_ndarray, + int* is_const, + int* arg_types, + CudaKernelHandle* out); /* * \brief delete kernel * \param handle handle to previously created kernel @@ -3013,10 +2928,15 @@ MXNET_DLL int MXRtcCudaKernelFree(CudaKernelHandle handle); * \param block_dim_z block dimension z * \param shared_mem size of dynamically allocated shared memory */ -MXNET_DLL int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** args, - uint32_t grid_dim_x, uint32_t grid_dim_y, - uint32_t grid_dim_z, uint32_t block_dim_x, - uint32_t block_dim_y, uint32_t block_dim_z, +MXNET_DLL int MXRtcCudaKernelCall(CudaKernelHandle handle, + int dev_id, + void** args, + uint32_t grid_dim_x, + uint32_t grid_dim_y, + uint32_t grid_dim_z, + uint32_t block_dim_x, + uint32_t block_dim_y, + uint32_t block_dim_z, uint32_t shared_mem); /*! * \brief Get shared memory handle from NDArray @@ -3024,8 +2944,7 @@ MXNET_DLL int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** ar * \param shared_pid output PID * \param shared_id output shared memory id. */ -MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, - int* shared_id); +MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shared_id); /*! * \brief Release all unreferenced memory from the devices storage managers memory pool @@ -3043,55 +2962,69 @@ MXNET_DLL int MXStorageEmptyCache(int dev_type, int dev_id); * \param dtype data type of NDArray * \param out constructed NDArray */ -MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape, - int ndim, int dtype, NDArrayHandle *out); - -/*! - * \brief Push an asynchronous operation to the engine. - * \param async_func Execution function whici takes a parameter on_complete - * that must be called when the execution ompletes. - * \param func_param The parameter set on calling async_func, can be NULL. - * \param deleter The callback to free func_param, can be NULL. - * \param ctx_handle Execution context. - * \param const_vars_handle The variables that current operation will use - * but not mutate. - * \param num_const_vars The number of const_vars_handle. - * \param mutable_vars_handle The variables that current operation will mutate. - * \param num_mutable_vars The number of mutable_vars_handle. - * \param prop_handle Property of the function. - * \param priority Priority of the action, as hint to the engine. - * \param opr_name The operation name. - * \param wait Whether this is a WaitForVar operation. - */ -MXNET_DLL int MXEnginePushAsync(EngineAsyncFunc async_func, void* func_param, - EngineFuncParamDeleter deleter, ContextHandle ctx_handle, - EngineVarHandle const_vars_handle, int num_const_vars, - EngineVarHandle mutable_vars_handle, int num_mutable_vars, +MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, + int shared_id, + const int* shape, + int ndim, + int dtype, + NDArrayHandle* out); + +/*! + * \brief Push an asynchronous operation to the engine. + * \param async_func Execution function whici takes a parameter on_complete + * that must be called when the execution ompletes. + * \param func_param The parameter set on calling async_func, can be NULL. + * \param deleter The callback to free func_param, can be NULL. + * \param ctx_handle Execution context. + * \param const_vars_handle The variables that current operation will use + * but not mutate. + * \param num_const_vars The number of const_vars_handle. + * \param mutable_vars_handle The variables that current operation will mutate. + * \param num_mutable_vars The number of mutable_vars_handle. + * \param prop_handle Property of the function. + * \param priority Priority of the action, as hint to the engine. + * \param opr_name The operation name. + * \param wait Whether this is a WaitForVar operation. + */ +MXNET_DLL int MXEnginePushAsync(EngineAsyncFunc async_func, + void* func_param, + EngineFuncParamDeleter deleter, + ContextHandle ctx_handle, + EngineVarHandle const_vars_handle, + int num_const_vars, + EngineVarHandle mutable_vars_handle, + int num_mutable_vars, EngineFnPropertyHandle prop_handle DEFAULT(NULL), - int priority DEFAULT(0), const char* opr_name DEFAULT(NULL), + int priority DEFAULT(0), + const char* opr_name DEFAULT(NULL), bool wait DEFAULT(false)); /*! - * \brief Push a synchronous operation to the engine. - * \param sync_func Execution function that executes the operation. - * \param func_param The parameter set on calling sync_func, can be NULL. - * \param deleter The callback to free func_param, can be NULL. - * \param ctx_handle Execution context. - * \param const_vars_handle The variables that current operation will use - * but not mutate. - * \param num_const_vars The number of const_vars_handle. - * \param mutable_vars_handle The variables that current operation will mutate. - * \param num_mutable_vars The number of mutable_vars_handle. - * \param prop_handle Property of the function. - * \param priority Priority of the action, as hint to the engine. - * \param opr_name The operation name. - */ -MXNET_DLL int MXEnginePushSync(EngineSyncFunc sync_func, void* func_param, - EngineFuncParamDeleter deleter, ContextHandle ctx_handle, - EngineVarHandle const_vars_handle, int num_const_vars, - EngineVarHandle mutable_vars_handle, int num_mutable_vars, + * \brief Push a synchronous operation to the engine. + * \param sync_func Execution function that executes the operation. + * \param func_param The parameter set on calling sync_func, can be NULL. + * \param deleter The callback to free func_param, can be NULL. + * \param ctx_handle Execution context. + * \param const_vars_handle The variables that current operation will use + * but not mutate. + * \param num_const_vars The number of const_vars_handle. + * \param mutable_vars_handle The variables that current operation will mutate. + * \param num_mutable_vars The number of mutable_vars_handle. + * \param prop_handle Property of the function. + * \param priority Priority of the action, as hint to the engine. + * \param opr_name The operation name. + */ +MXNET_DLL int MXEnginePushSync(EngineSyncFunc sync_func, + void* func_param, + EngineFuncParamDeleter deleter, + ContextHandle ctx_handle, + EngineVarHandle const_vars_handle, + int num_const_vars, + EngineVarHandle mutable_vars_handle, + int num_mutable_vars, EngineFnPropertyHandle prop_handle DEFAULT(NULL), - int priority DEFAULT(0), const char* opr_name DEFAULT(NULL)); + int priority DEFAULT(0), + const char* opr_name DEFAULT(NULL)); /*! * \brief Create an NDArray from source sharing the same data chunk. * \param src source NDArray @@ -3103,84 +3036,93 @@ MXNET_DLL int MXShallowCopyNDArray(NDArrayHandle src, NDArrayHandle* out); * \param src source Symbol * \param out new Symbol sharing the same graph structure with src */ -MXNET_DLL int MXShallowCopySymbol(SymbolHandle src, SymbolHandle * out); - -/*! - * \brief Push an asynchronous operation to the engine. - * \param async_func Execution function whici takes a parameter on_complete - * that must be called when the execution ompletes. - * \param func_param The parameter set on calling async_func, can be NULL. - * \param deleter The callback to free func_param, can be NULL. - * \param ctx_handle Execution context. - * \param const_nds_handle The NDArrays that current operation will use - * but not mutate. - * \param num_const_nds The number of const_nds_handle. - * \param mutable_nds_handle The NDArrays that current operation will mutate. - * \param num_mutable_nds The number of mutable_nds_handle. - * \param prop_handle Property of the function. - * \param priority Priority of the action, as hint to the engine. - * \param opr_name The operation name. - * \param wait Whether this is a WaitForVar operation. - */ -MXNET_DLL int MXEnginePushAsyncND(EngineAsyncFunc async_func, void* func_param, - EngineFuncParamDeleter deleter, ContextHandle ctx_handle, - NDArrayHandle* const_nds_handle, int num_const_nds, - NDArrayHandle* mutable_nds_handle, int num_mutable_nds, +MXNET_DLL int MXShallowCopySymbol(SymbolHandle src, SymbolHandle* out); + +/*! + * \brief Push an asynchronous operation to the engine. + * \param async_func Execution function whici takes a parameter on_complete + * that must be called when the execution ompletes. + * \param func_param The parameter set on calling async_func, can be NULL. + * \param deleter The callback to free func_param, can be NULL. + * \param ctx_handle Execution context. + * \param const_nds_handle The NDArrays that current operation will use + * but not mutate. + * \param num_const_nds The number of const_nds_handle. + * \param mutable_nds_handle The NDArrays that current operation will mutate. + * \param num_mutable_nds The number of mutable_nds_handle. + * \param prop_handle Property of the function. + * \param priority Priority of the action, as hint to the engine. + * \param opr_name The operation name. + * \param wait Whether this is a WaitForVar operation. + */ +MXNET_DLL int MXEnginePushAsyncND(EngineAsyncFunc async_func, + void* func_param, + EngineFuncParamDeleter deleter, + ContextHandle ctx_handle, + NDArrayHandle* const_nds_handle, + int num_const_nds, + NDArrayHandle* mutable_nds_handle, + int num_mutable_nds, EngineFnPropertyHandle prop_handle DEFAULT(NULL), - int priority DEFAULT(0), const char* opr_name DEFAULT(NULL), + int priority DEFAULT(0), + const char* opr_name DEFAULT(NULL), bool wait DEFAULT(false)); /*! - * \brief Push a synchronous operation to the engine. - * \param sync_func Execution function that executes the operation. - * \param func_param The parameter set on calling sync_func, can be NULL. - * \param deleter The callback to free func_param, can be NULL. - * \param ctx_handle Execution context. - * \param const_nds_handle The NDArrays that current operation will use - * but not mutate. - * \param num_const_nds The number of const_nds_handle. - * \param mutable_nds_handle The NDArrays that current operation will mutate. - * \param num_mutable_nds The number of mutable_nds_handle. - * \param prop_handle Property of the function. - * \param priority Priority of the action, as hint to the engine. - * \param opr_name The operation name. - */ -MXNET_DLL int MXEnginePushSyncND(EngineSyncFunc sync_func, void* func_param, - EngineFuncParamDeleter deleter, ContextHandle ctx_handle, - NDArrayHandle* const_nds_handle, int num_const_nds, - NDArrayHandle* mutable_nds_handle, int num_mutable_nds, + * \brief Push a synchronous operation to the engine. + * \param sync_func Execution function that executes the operation. + * \param func_param The parameter set on calling sync_func, can be NULL. + * \param deleter The callback to free func_param, can be NULL. + * \param ctx_handle Execution context. + * \param const_nds_handle The NDArrays that current operation will use + * but not mutate. + * \param num_const_nds The number of const_nds_handle. + * \param mutable_nds_handle The NDArrays that current operation will mutate. + * \param num_mutable_nds The number of mutable_nds_handle. + * \param prop_handle Property of the function. + * \param priority Priority of the action, as hint to the engine. + * \param opr_name The operation name. + */ +MXNET_DLL int MXEnginePushSyncND(EngineSyncFunc sync_func, + void* func_param, + EngineFuncParamDeleter deleter, + ContextHandle ctx_handle, + NDArrayHandle* const_nds_handle, + int num_const_nds, + NDArrayHandle* mutable_nds_handle, + int num_mutable_nds, EngineFnPropertyHandle prop_handle DEFAULT(NULL), - int priority DEFAULT(0), const char* opr_name DEFAULT(NULL)); + int priority DEFAULT(0), + const char* opr_name DEFAULT(NULL)); /*! * \brief This function checks if any dynamic shape op is present in the symbol. * \param sym_handle handler of the input symbol. * \param has_dynamic_shape Flag to indicate if the symbol contains dynamic shape op. */ -MXNET_DLL int MXCheckDynamicShapeOp(SymbolHandle sym_handle, - bool* has_dynamic_shape); +MXNET_DLL int MXCheckDynamicShapeOp(SymbolHandle sym_handle, bool* has_dynamic_shape); /*! - * \brief Push a new NVTX range. Requires building with CUDA and NVTX. - * \param name Name of the range. - * \param color Color used to display the range in the visual profiling tools. - * Encoded as 256*256*R + 256*G + B. - */ -MXNET_DLL int MXNVTXRangePush(const char * name, mx_uint color); + * \brief Push a new NVTX range. Requires building with CUDA and NVTX. + * \param name Name of the range. + * \param color Color used to display the range in the visual profiling tools. + * Encoded as 256*256*R + 256*G + B. + */ +MXNET_DLL int MXNVTXRangePush(const char* name, mx_uint color); /*! - * \brief End the NVTX range. Requires building with CUDA and NVTX. - */ + * \brief End the NVTX range. Requires building with CUDA and NVTX. + */ MXNET_DLL int MXNVTXRangePop(); /*! - * \brief Start CUDA profiling session. Requires building with CUDA and NVTX. - */ + * \brief Start CUDA profiling session. Requires building with CUDA and NVTX. + */ MXNET_DLL int MXCUDAProfilerStart(); /*! - * \brief End CUDA profiling session. Requires building with CUDA and NVTX. - */ + * \brief End CUDA profiling session. Requires building with CUDA and NVTX. + */ MXNET_DLL int MXCUDAProfilerStop(); #ifdef __cplusplus diff --git a/include/mxnet/c_api_error.h b/include/mxnet/c_api_error.h index 2aa3a23887b3..e3cfb8381355 100644 --- a/include/mxnet/c_api_error.h +++ b/include/mxnet/c_api_error.h @@ -36,26 +36,26 @@ * and finishes with API_END() or API_END_HANDLE_ERROR() * The finally clause contains procedure to cleanup states when an error happens. */ -#define MX_API_BEGIN() \ - try { \ +#define MX_API_BEGIN() \ + try { \ on_enter_api(__FUNCTION__); -#define MX_API_END() \ - } \ - catch (const std::exception &_except_) { \ - on_exit_api(); \ - return MXAPIHandleException(_except_); \ - } \ - on_exit_api(); \ - return 0; // NOLINT(*) -#define MX_API_END_HANDLE_ERROR(Finalize) \ - } \ - catch (const std::exception &_except_) { \ - Finalize; \ - on_exit_api(); \ - return MXAPIHandleException(_except_); \ - } \ - on_exit_api(); \ - return 0; // NOLINT(*) +#define MX_API_END() \ + } \ + catch (const std::exception& _except_) { \ + on_exit_api(); \ + return MXAPIHandleException(_except_); \ + } \ + on_exit_api(); \ + return 0; // NOLINT(*) +#define MX_API_END_HANDLE_ERROR(Finalize) \ + } \ + catch (const std::exception& _except_) { \ + Finalize; \ + on_exit_api(); \ + return MXAPIHandleException(_except_); \ + } \ + on_exit_api(); \ + return 0; // NOLINT(*) /*! * \brief Set the last error message needed by C API @@ -67,10 +67,10 @@ void MXAPISetLastError(const char* msg); * \param e the exception * \return the return value of API after exception is handled */ -int MXAPIHandleException(const std::exception &e); +int MXAPIHandleException(const std::exception& e); namespace mxnet { -extern void on_enter_api(const char *function); +extern void on_enter_api(const char* function); extern void on_exit_api(); } #endif // MXNET_C_API_ERROR_H_ diff --git a/include/mxnet/c_api_test.h b/include/mxnet/c_api_test.h index ab662443c29a..5b37262ede8c 100644 --- a/include/mxnet/c_api_test.h +++ b/include/mxnet/c_api_test.h @@ -38,10 +38,10 @@ extern "C" { * used only for the testing purpose. */ MXNET_DLL int MXBuildSubgraphByOpNames(SymbolHandle sym_handle, - const char* prop_name, - const uint32_t num_ops, - const char** op_names, - SymbolHandle* ret_sym_handle); + const char* prop_name, + const uint32_t num_ops, + const char** op_names, + SymbolHandle* ret_sym_handle); /*! * \brief Given a subgraph property name, use the provided op names @@ -60,8 +60,8 @@ MXNET_DLL int MXSetSubgraphPropertyOpNames(const char* prop_name, * op_names to the backend property. */ MXNET_DLL int MXSetSubgraphPropertyOpNamesV2(const char* prop_name, - const uint32_t num_ops, - const char** op_names); + const uint32_t num_ops, + const char** op_names); /*! * \brief Given a subgraph property name, delete the op name set * in the SubgraphPropertyOpNameSet. @@ -73,29 +73,26 @@ MXNET_DLL int MXRemoveSubgraphPropertyOpNames(const char* prop_name); */ MXNET_DLL int MXRemoveSubgraphPropertyOpNamesV2(const char* prop_name); - /*! * \brief Get the value of an environment variable as seen by the backend. * \param name The name of the environment variable * \param value The returned value of the environment variable */ -MXNET_DLL int MXGetEnv(const char* name, - const char** value); +MXNET_DLL int MXGetEnv(const char* name, const char** value); /*! * \brief Set the value of an environment variable from the backend. * \param name The name of the environment variable * \param value The desired value to set the environment variable `name` */ -MXNET_DLL int MXSetEnv(const char* name, - const char* value); +MXNET_DLL int MXSetEnv(const char* name, const char* value); /*! * \brief Get the maximum SM architecture supported by the nvrtc compiler * \param max_arch The maximum supported architecture (e.g. would be 80, if Ampere) * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXGetMaxSupportedArch(uint32_t *max_arch); +MXNET_DLL int MXGetMaxSupportedArch(uint32_t* max_arch); #ifdef __cplusplus } diff --git a/include/mxnet/engine.h b/include/mxnet/engine.h index cdb8998d2e83..9d20fdd43d74 100644 --- a/include/mxnet/engine.h +++ b/include/mxnet/engine.h @@ -178,7 +178,7 @@ class CallbackOnComplete { /*! \brief engine can see content of callback */ friend class ::mxnet::Engine; /*! \brief the real callback */ - void (*callback_)(Engine *, void *, const dmlc::Error *); + void (*callback_)(Engine*, void*, const dmlc::Error*); /*! \brief the engine class passed to callback */ Engine* engine_; /*! \brief the parameter set on callback */ @@ -209,7 +209,7 @@ enum class FnProperty { /*! * \brief Dependency engine that schedules operations. -*/ + */ class MXNET_API Engine { public: /*! \brief on start*/ @@ -266,9 +266,9 @@ class MXNET_API Engine { virtual OprHandle NewOperator(AsyncFn fn, std::vector const& const_vars, std::vector const& mutable_vars, - FnProperty prop = FnProperty::kNormal, + FnProperty prop = FnProperty::kNormal, const char* opr_name = nullptr, - bool wait = false) = 0; + bool wait = false) = 0; /*! * \brief Delete the given operator. * \param op The operator to delete. @@ -299,13 +299,14 @@ class MXNET_API Engine { * \param opr_name The operator name. * \param wait Whether this is a WaitForVar operation */ - virtual void PushAsync(AsyncFn exec_fun, Context exec_ctx, + virtual void PushAsync(AsyncFn exec_fun, + Context exec_ctx, std::vector const& const_vars, std::vector const& mutable_vars, - FnProperty prop = FnProperty::kNormal, - int priority = 0, + FnProperty prop = FnProperty::kNormal, + int priority = 0, const char* opr_name = nullptr, - bool wait = false) = 0; + bool wait = false) = 0; /*! * \brief Schedule the deletion of a variable. * @@ -317,9 +318,7 @@ class MXNET_API Engine { * \param exec_ctx Execution context. * \param var The variable to be deleted. */ - virtual void DeleteVariable(SyncFn delete_fn, - Context exec_ctx, - VarHandle var) = 0; + virtual void DeleteVariable(SyncFn delete_fn, Context exec_ctx, VarHandle var) = 0; /*! * \brief Wait for a variable. * \param var The variable we should wait for. This function returns when the @@ -359,11 +358,12 @@ class MXNET_API Engine { * \param opr_name The operator name. * \tparam SyncFn the synchronous function to be pushed. */ - virtual void PushSync(SyncFn exec_fn, Context exec_ctx, + virtual void PushSync(SyncFn exec_fn, + Context exec_ctx, std::vector const& const_vars, std::vector const& mutable_vars, - FnProperty prop = FnProperty::kNormal, - int priority = 0, + FnProperty prop = FnProperty::kNormal, + int priority = 0, const char* opr_name = nullptr) { this->PushAsync( [exec_fn](RunContext ctx, CallbackOnStart on_start, CallbackOnComplete on_complete) { @@ -398,28 +398,27 @@ class MXNET_API Engine { * \param callback th static callback function. * \param param the paramter passed to callback. */ - inline CallbackOnComplete CreateCallback( - void (*callback)(Engine *, void *, const dmlc::Error *), void *param) { + inline CallbackOnComplete CreateCallback(void (*callback)(Engine*, void*, const dmlc::Error*), + void* param) { CallbackOnComplete ret; ret.callback_ = callback; - ret.engine_ = this; - ret.param_ = param; + ret.engine_ = this; + ret.param_ = param; return ret; } // For each var vector, sort it and remove the duplicated vars. // Also remove vars from read_vars if it also appears in write_vars - inline void DeduplicateVarHandle(std::vector *read_vars, - std::vector *write_vars) { + inline void DeduplicateVarHandle(std::vector* read_vars, + std::vector* write_vars) { std::sort(write_vars->begin(), write_vars->end()); - write_vars->resize(std::unique(write_vars->begin(), write_vars->end()) - - write_vars->begin()); + write_vars->resize(std::unique(write_vars->begin(), write_vars->end()) - write_vars->begin()); std::sort(read_vars->begin(), read_vars->end()); - read_vars->resize(std::unique(read_vars->begin(), read_vars->end()) - - read_vars->begin()); - auto wit = write_vars->begin(); + read_vars->resize(std::unique(read_vars->begin(), read_vars->end()) - read_vars->begin()); + auto wit = write_vars->begin(); auto rtop = read_vars->begin(); for (auto rit = read_vars->begin(); rit != read_vars->end(); ++rit) { - while (wit != write_vars->end() && *wit < *rit) ++wit; + while (wit != write_vars->end() && *wit < *rit) + ++wit; if (wit == write_vars->end() || *wit != *rit) { *rtop = *rit; ++rtop; @@ -435,7 +434,7 @@ class MXNET_API Engine { virtual int set_bulk_size(int) { return 0; } -}; // class Engine +}; // class Engine #endif // DMLC_USE_CXX11 } // namespace mxnet #endif // MXNET_ENGINE_H_ diff --git a/include/mxnet/executor.h b/include/mxnet/executor.h index a432f0fc9e57..c5c3719fade2 100644 --- a/include/mxnet/executor.h +++ b/include/mxnet/executor.h @@ -66,7 +66,7 @@ class Executor { * \param step current step, user can always start from 0 * \param step_left Number of steps left to finish the forward. */ - virtual void PartialForward(bool is_train, int step, int *step_left) = 0; + virtual void PartialForward(bool is_train, int step, int* step_left) = 0; /*! * \brief Perform a Backward operation of the Operator. * This must be called after Forward. @@ -76,17 +76,17 @@ class Executor { * * \param head_grads the gradient of head nodes to be backproped. */ - virtual void Backward(const std::vector &head_grads, bool is_train = true) = 0; + virtual void Backward(const std::vector& head_grads, bool is_train = true) = 0; /*! * \brief print the execution plan info to output stream. * \param os the output stream we like to print to. */ - virtual void Print(std::ostream &os) const {} // NOLINT(*) + virtual void Print(std::ostream& os) const {} // NOLINT(*) /*! * \brief get array of outputs in the executor. * \return array of outputs in the executor. */ - virtual const std::vector &outputs() const = 0; + virtual const std::vector& outputs() const = 0; /*! * \brief get input argument map, key is arg name, value is arg's NDArray. * \return input argument map in the executor. @@ -107,64 +107,62 @@ class Executor { * but different input/output shapes. * * \param partial_shaping Whether to allow changing the shape of unspecified arguments. - * \param allow_up_sizing Whether to allow allocating new ndarrays that's larger than the original. - * \param default_ctx the default context of binding. - * \param ctx_map Context mapping group to context. - * \param provided_arg_shapes New shape for arguments. - * \param in_args the NDArray that stores the input arguments. - * \param arg_grads NDArray that is used to store the gradient output of the input arguments. - * \param aux_states NDArray that is used as internal states. - * \return a new executor. + * \param allow_up_sizing Whether to allow allocating new ndarrays that's larger than the + * original. \param default_ctx the default context of binding. \param ctx_map Context mapping + * group to context. \param provided_arg_shapes New shape for arguments. \param in_args the + * NDArray that stores the input arguments. \param arg_grads NDArray that is used to store the + * gradient output of the input arguments. \param aux_states NDArray that is used as internal + * states. \return a new executor. */ - virtual Executor* Reshape(const bool partial_shaping, - const bool allow_up_sizing, - const Context& default_ctx, - const std::map& ctx_map, - const std::unordered_map& - provided_arg_shapes, - std::vector* in_args, - std::vector* arg_grads, - std::vector* aux_states) = 0; + virtual Executor* Reshape( + const bool partial_shaping, + const bool allow_up_sizing, + const Context& default_ctx, + const std::map& ctx_map, + const std::unordered_map& provided_arg_shapes, + std::vector* in_args, + std::vector* arg_grads, + std::vector* aux_states) = 0; /*! * \brief Create an operator by bind symbol with context and arguments. - * If user do not want to compute the gradients of i-th argument, grad_req_type[i] can be kNullOp. + * If user do not want to compute the gradients of i-th argument, grad_req_type[i] can be + * kNullOp. * * \param default_ctx the default context of binding. * \param group2ctx Context mapping group to context. * \param symbol the symbol that specifies the output of Forward pass. * \param in_args the NDArray that stores the input arguments to the symbol. * \param arg_grad_store NDArray that is used to store the gradient output of the input arguments. - * \param grad_req_type requirment type of gradient saving. Can only be in {kNullOp, kAddTo, kWriteTo}. - * \param aux_states NDArray that is used as internal state in op - * \param shared_exec input executor to share memory with. - * \return a new executor. + * \param grad_req_type requirment type of gradient saving. Can only be in {kNullOp, kAddTo, + * kWriteTo}. \param aux_states NDArray that is used as internal state in op \param shared_exec + * input executor to share memory with. \return a new executor. */ - static Executor *Bind(nnvm::Symbol symbol, + static Executor* Bind(nnvm::Symbol symbol, const Context& default_ctx, const std::map& group2ctx, - const std::vector &in_args, - const std::vector &arg_grad_store, - const std::vector &grad_req_type, - const std::vector &aux_states, + const std::vector& in_args, + const std::vector& arg_grad_store, + const std::vector& grad_req_type, + const std::vector& aux_states, Executor* shared_exec = nullptr); - static Executor* SimpleBind(nnvm::Symbol symbol, - const Context& default_ctx, - const std::map& group2ctx, - const std::vector& in_arg_ctxes, - const std::vector& arg_grad_ctxes, - const std::vector& aux_state_ctxes, - const std::unordered_map& arg_shape_map, - const std::unordered_map& arg_dtype_map, - const std::unordered_map& arg_stype_map, - const std::vector& grad_req_types, - const std::unordered_set& param_names, - std::vector* in_args, - std::vector* arg_grads, - std::vector* aux_states, - std::unordered_map* - shared_data_arrays = nullptr, - Executor* shared_exec = nullptr); + static Executor* SimpleBind( + nnvm::Symbol symbol, + const Context& default_ctx, + const std::map& group2ctx, + const std::vector& in_arg_ctxes, + const std::vector& arg_grad_ctxes, + const std::vector& aux_state_ctxes, + const std::unordered_map& arg_shape_map, + const std::unordered_map& arg_dtype_map, + const std::unordered_map& arg_stype_map, + const std::vector& grad_req_types, + const std::unordered_set& param_names, + std::vector* in_args, + std::vector* arg_grads, + std::vector* aux_states, + std::unordered_map* shared_data_arrays = nullptr, + Executor* shared_exec = nullptr); /*! * \brief the prototype of user-defined monitor callback diff --git a/include/mxnet/expr_operator.h b/include/mxnet/expr_operator.h index c28761c0d1b9..8779d23aa6ab 100644 --- a/include/mxnet/expr_operator.h +++ b/include/mxnet/expr_operator.h @@ -33,17 +33,18 @@ namespace mxnet { -template +template inline PrimExpr MakeConstScalar(MXNetDataType t, ValueType value) { - if (t.is_int()) return IntImm(t, static_cast(value)); - if (t.is_float()) return FloatImm(t, static_cast(value)); + if (t.is_int()) + return IntImm(t, static_cast(value)); + if (t.is_float()) + return FloatImm(t, static_cast(value)); // customized type and uint is not supported for MXNet for now LOG(FATAL) << "cannot make const for type " << t; return PrimExpr(); } - -template +template inline PrimExpr make_const(MXNetDataType t, ValueType value) { if (t.lanes() == 1) { return MakeConstScalar(t, value); diff --git a/include/mxnet/imperative.h b/include/mxnet/imperative.h index 76ccf253d904..e4e3f6a938d0 100644 --- a/include/mxnet/imperative.h +++ b/include/mxnet/imperative.h @@ -35,18 +35,18 @@ #include "./ndarray.h" namespace mxnet { - /*! \brief there are three numpy shape flags based on priority. - * GlobalOn - * turn on numpy shape flag globally, it includes thread local. - * The flag can be seen in any thread. - * ThreadLocalOn - * only turn on thread local numpy shape flag, it cannot be seen - * in other threads. - * Off - * turn off numpy shape flag globally. - * */ - enum NumpyShape{Off, ThreadLocalOn, GlobalOn}; - typedef NumpyShape NumpyDefaultDtype; +/*! \brief there are three numpy shape flags based on priority. + * GlobalOn + * turn on numpy shape flag globally, it includes thread local. + * The flag can be seen in any thread. + * ThreadLocalOn + * only turn on thread local numpy shape flag, it cannot be seen + * in other threads. + * Off + * turn off numpy shape flag globally. + * */ +enum NumpyShape { Off, ThreadLocalOn, GlobalOn }; +typedef NumpyShape NumpyDefaultDtype; /*! \brief runtime functions for NDArray */ class Imperative { public: @@ -61,13 +61,14 @@ class Imperative { // interested in (marked variables) bool fresh_out_grad; - AGInfo() : - grad_req(kNullOp), fresh_out_grad(false) {} + AGInfo() : grad_req(kNullOp), fresh_out_grad(false) {} static void Clear(const nnvm::ObjectPtr& node) { - if (node == nullptr || node->info.empty()) return; + if (node == nullptr || node->info.empty()) + return; AGInfo& info = Get(node); - if (info.grad_req != kNullOp) return; + if (info.grad_req != kNullOp) + return; node->info.clear(); } @@ -86,40 +87,38 @@ class Imperative { static bool IsVariable(const nnvm::ObjectPtr& node) { AGInfo& info = Get(node); - return info.grad_req != kNullOp && info.outputs.size() == 1 - && info.out_grads.size() == 1; + return info.grad_req != kNullOp && info.outputs.size() == 1 && info.out_grads.size() == 1; } }; /*! \brief DCInfo datastructure to enable deferred computation */ class DCInfo { public: - explicit DCInfo(const std::vector &inputs, - const std::vector &outputs); + explicit DCInfo(const std::vector& inputs, const std::vector& outputs); /*! \brief Compute the outputs of the associated operator. */ - static void Compute(const NDArray &arr); + static void Compute(const NDArray& arr); - static DCInfo &Get(const nnvm::ObjectPtr &node) { + static DCInfo& Get(const nnvm::ObjectPtr& node) { return dmlc::get(node->info); } - static bool IsNone(const NDArray &arr) { + static bool IsNone(const NDArray& arr) { return arr.deferredcompute_entry_.node == nullptr || arr.deferredcompute_entry_.node->info.empty(); } - static bool IsComputed(const NDArray &arr) { - return IsNone(arr) || - dmlc::get(arr.deferredcompute_entry_.node->info).is_computed_; + static bool IsComputed(const NDArray& arr) { + return IsNone(arr) || dmlc::get(arr.deferredcompute_entry_.node->info).is_computed_; } - static DCInfo &Create(const nnvm::ObjectPtr &node, - const std::vector &inputs, - const std::vector &outputs); + static DCInfo& Create(const nnvm::ObjectPtr& node, + const std::vector& inputs, + const std::vector& outputs); static void Clear(const nnvm::ObjectPtr& node) { - if (node == nullptr || node->info.empty()) return; + if (node == nullptr || node->info.empty()) + return; node->info.clear(); } @@ -146,7 +145,7 @@ class Imperative { * Note that the frontend may have deallocated the NDArray* and the * input_handles stored here may point to invalid memory. */ - std::vector input_handles_; + std::vector input_handles_; /*! \brief Copies of output NDArrays * @@ -168,9 +167,9 @@ class Imperative { } /*! \brief turn on or turn off operator recording for autograd. */ bool set_is_training(bool is_train) { - bool old = is_train_; - is_train_ = is_train; - return old; + bool old = is_train_; + is_train_ = is_train; + return old; } /*! \brief whether operator recording is on. */ bool is_recording() const { @@ -178,15 +177,17 @@ class Imperative { } /*! \brief turn on or turn off operator recording for autograd. */ bool set_is_recording(bool is_recording) { - bool old = is_recording_; - is_recording_ = is_recording; - return old; + bool old = is_recording_; + is_recording_ = is_recording; + return old; } /*! \brief whether deferred compute mode is on. */ - bool is_deferred_compute() const { return is_deferred_compute_; } + bool is_deferred_compute() const { + return is_deferred_compute_; + } /*! \brief turn on or turn off operator recording for autograd. */ bool set_is_deferred_compute(bool is_deferred_compute) { - bool old = is_deferred_compute_; + bool old = is_deferred_compute_; is_deferred_compute_ = is_deferred_compute; return old; } @@ -197,24 +198,22 @@ class Imperative { if (is_np_shape_global_) { return NumpyShape::GlobalOn; } - return is_np_shape_thread_local_ ? - NumpyShape::ThreadLocalOn : - NumpyShape::Off; + return is_np_shape_thread_local_ ? NumpyShape::ThreadLocalOn : NumpyShape::Off; } /*! \brief specify numpy compatibility off, thread local on or global on. */ bool set_is_np_shape(int is_np_shape) { NumpyShape flag = static_cast(is_np_shape); - bool old = this->is_np_shape(); + bool old = this->is_np_shape(); switch (flag) { case GlobalOn: - is_np_shape_global_ = true; + is_np_shape_global_ = true; is_np_shape_thread_local_ = true; break; case ThreadLocalOn: is_np_shape_thread_local_ = true; break; case Off: - is_np_shape_global_ = false; + is_np_shape_global_ = false; is_np_shape_thread_local_ = false; break; } @@ -242,19 +241,19 @@ class Imperative { void RecordOp(nnvm::NodeAttrs&& attrs, const std::vector& inputs, const std::vector& outputs, - const OpStatePtr& state = OpStatePtr(), - std::vector* p_save_inputs = nullptr, + const OpStatePtr& state = OpStatePtr(), + std::vector* p_save_inputs = nullptr, std::vector* p_save_outputs = nullptr); /*! \brief to record operator, return corresponding node. */ void RecordDeferredCompute(nnvm::NodeAttrs&& attrs, const std::vector& inputs, const std::vector& outputs); /*! \brief obtain symbol representation of deferred compute session. */ - nnvm::Symbol GetDeferredComputeSymbol(const std::vector &outputs); + nnvm::Symbol GetDeferredComputeSymbol(const std::vector& outputs); /*! \brief associate arrays with variables for deferred compute */ - void SetDeferredComputeVariable(NDArrayHandle *arrays, SymbolHandle *variables, const int num); + void SetDeferredComputeVariable(NDArrayHandle* arrays, SymbolHandle* variables, const int num); /*! \brief clear info node associated with array */ - void DeferredComputeClear(NDArrayHandle *arrays, const int num); + void DeferredComputeClear(NDArrayHandle* arrays, const int num); /*! \brief */ OpStatePtr Invoke(const Context& default_ctx, const nnvm::NodeAttrs& attrs, @@ -278,7 +277,8 @@ class Imperative { std::vector Backward(const std::vector& outputs, const std::vector& ograds, const std::vector& variables, - bool is_train, bool retain_graph, + bool is_train, + bool retain_graph, bool create_graph); /*! \brief Return the marked nonleaf nodes. */ std::vector ListNonleafVariables(const nnvm::Symbol& sym) const; @@ -311,11 +311,11 @@ class Imperative { backward_bulk_size_ = BulkExecMaxNodeTrainBwd(); } /*! \brief find the input/output ndarrays that are needed for backward */ - void GetBackwardDependency( - const nnvm::ObjectPtr& node, - uint32_t num_inputs, uint32_t num_outputs, - std::vector *p_save_inputs, - std::vector *p_save_outputs); + void GetBackwardDependency(const nnvm::ObjectPtr& node, + uint32_t num_inputs, + uint32_t num_outputs, + std::vector* p_save_inputs, + std::vector* p_save_outputs); /*! \brief indicate whether is training. */ #if DMLC_CXX11_THREAD_LOCAL static thread_local bool is_train_; diff --git a/include/mxnet/io.h b/include/mxnet/io.h index aebc5f663def..4c2d7cfb20ca 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -38,7 +38,7 @@ namespace mxnet { * \brief iterator type * \tparam DType data type */ -template +template class IIterator : public dmlc::DataIter { public: /*! @@ -51,7 +51,7 @@ class IIterator : public dmlc::DataIter { /*! \brief move to next item */ virtual bool Next(void) = 0; /*! \brief get current data */ - virtual const DType &Value(void) const = 0; + virtual const DType& Value(void) const = 0; /*! \brief constructor */ virtual ~IIterator(void) {} /*! \brief store the name of each data, it could be used for making NDArrays */ @@ -94,14 +94,11 @@ struct DataBatch { }; // struct DataBatch /*! \brief typedef the factory function of data iterator */ -typedef std::function *()> DataIteratorFactory; +typedef std::function*()> DataIteratorFactory; /*! * \brief Registry entry for DataIterator factory functions. */ -struct DataIteratorReg - : public dmlc::FunctionRegEntryBase { -}; +struct DataIteratorReg : public dmlc::FunctionRegEntryBase {}; //-------------------------------------------------------------- // The following part are API Registration of Iterators //-------------------------------------------------------------- @@ -117,7 +114,7 @@ struct DataIteratorReg * }); * \endcode */ -#define MXNET_REGISTER_IO_ITER(name) \ +#define MXNET_REGISTER_IO_ITER(name) \ DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) /*! @@ -129,29 +126,26 @@ struct DataIteratorReg class Dataset { public: /*! - * \brief Get the size of the dataset - */ + * \brief Get the size of the dataset + */ virtual uint64_t GetLen(void) const = 0; /*! - * \brief Get the ndarray items given index in dataset - * \param idx the integer index for required data - * \param ret the returned ndarray items - */ + * \brief Get the ndarray items given index in dataset + * \param idx the integer index for required data + * \param ret the returned ndarray items + */ virtual bool GetItem(uint64_t idx, std::vector* ret) = 0; // virtual destructor virtual ~Dataset(void) {} }; // class Dataset /*! \brief typedef the factory function of dataset */ -typedef std::function >&)> DatasetFactory; +typedef std::function >&)> + DatasetFactory; /*! * \brief Registry entry for Dataset factory functions. */ -struct DatasetReg - : public dmlc::FunctionRegEntryBase { -}; +struct DatasetReg : public dmlc::FunctionRegEntryBase {}; //-------------------------------------------------------------- // The following part are API Registration of Datasets //-------------------------------------------------------------- @@ -167,7 +161,7 @@ struct DatasetReg * }); * \endcode */ -#define MXNET_REGISTER_IO_DATASET(name) \ +#define MXNET_REGISTER_IO_DATASET(name) \ DMLC_REGISTRY_REGISTER(::mxnet::DatasetReg, DatasetReg, name) class BatchifyFunction { @@ -182,15 +176,13 @@ class BatchifyFunction { using BatchifyFunctionPtr = std::shared_ptr; /*! \brief typedef the factory function of data sampler */ -typedef std::function >&)> BatchifyFunctionFactory; +typedef std::function >&)> + BatchifyFunctionFactory; /*! * \brief Registry entry for DataSampler factory functions. */ struct BatchifyFunctionReg - : public dmlc::FunctionRegEntryBase { -}; + : public dmlc::FunctionRegEntryBase {}; //-------------------------------------------------------------- // The following part are API Registration of Batchify Function //-------------------------------------------------------------- @@ -206,7 +198,7 @@ struct BatchifyFunctionReg * }); * \endcode */ -#define MXNET_REGISTER_IO_BATCHIFY_FUNCTION(name) \ +#define MXNET_REGISTER_IO_BATCHIFY_FUNCTION(name) \ DMLC_REGISTRY_REGISTER(::mxnet::BatchifyFunctionReg, BatchifyFunctionReg, name) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/include/mxnet/ir/expr.h b/include/mxnet/ir/expr.h index a9f4ff2bbf70..53053dec674b 100644 --- a/include/mxnet/ir/expr.h +++ b/include/mxnet/ir/expr.h @@ -100,7 +100,7 @@ class PrimExprNode : public BaseExprNode { */ class PrimExpr : public BaseExpr { public: - /*! \brief Cosntructor */ + /*! \brief Cosntructor */ PrimExpr() {} /*! * \brief Cosntructor from object ptr. diff --git a/include/mxnet/kvstore.h b/include/mxnet/kvstore.h index 0907d2d04e6f..9be22e97e9a8 100644 --- a/include/mxnet/kvstore.h +++ b/include/mxnet/kvstore.h @@ -45,9 +45,7 @@ namespace mxnet { * kPause allows pausing and resuming of profiler * kDump asks profiler to dump output */ -enum class KVStoreServerProfilerCommand { - kSetConfig, kState, kPause, kDump -}; +enum class KVStoreServerProfilerCommand { kSetConfig, kState, kPause, kDump }; /*! * \brief distributed key-value store @@ -70,20 +68,22 @@ class KVStore { * - 'dist_*' : multi-machines * \return a new created KVStore. */ - static KVStore *Create(const char *type = "local"); + static KVStore* Create(const char* type = "local"); /** * \brief return the type */ - inline const std::string& type() { return type_; } + inline const std::string& type() { + return type_; + } /** * \brief Set parameters to use low-bit compressed gradients * \param compression_type type of compression * \param threshold threshold for 2bit compression */ - virtual void SetGradientCompression(const std::vector > - & kwargs) = 0; + virtual void SetGradientCompression( + const std::vector>& kwargs) = 0; /*! * \brief Initialize a list of key-value pair to the store. @@ -101,8 +101,7 @@ class KVStore { * \param keys a list of unique keys * \param values a list of values */ - virtual void Init(const std::vector& keys, - const std::vector& values) = 0; + virtual void Init(const std::vector& keys, const std::vector& values) = 0; /*! * \brief Initialize a list of key-value pair to the store. * \param keys a list of unique keys in string format @@ -148,7 +147,7 @@ class KVStore { */ virtual void Push(const std::vector& keys, const std::vector& values, - int priority = 0) = 0; + int priority = 0) = 0; /*! * \brief push a list of key-value pairs into the store @@ -158,7 +157,7 @@ class KVStore { */ virtual void Push(const std::vector& str_keys, const std::vector& values, - int priority = 0) = 0; + int priority = 0) = 0; /*! * \brief pull a list of key-value pairs from the store * @@ -185,7 +184,8 @@ class KVStore { */ virtual void Pull(const std::vector& keys, const std::vector& values, - int priority = 0, bool ignore_sparse = true) = 0; + int priority = 0, + bool ignore_sparse = true) = 0; /*! * \brief pull a list of key-value pairs from the store * \param keys the list of keys in string format @@ -195,7 +195,8 @@ class KVStore { */ virtual void Pull(const std::vector& str_keys, const std::vector& values, - int priority = 0, bool ignore_sparse = true) = 0; + int priority = 0, + bool ignore_sparse = true) = 0; /*! * \brief broadcast a list of key-value pairs from the store @@ -214,10 +215,9 @@ class KVStore { /*! * \brief broadcast a list of key-value pairs from the store * \param vkeys the list of keys to be pushed in string format - * \param okeys the list of keys to be pulled in string format. Should be the same set of keys in vkeys. - * \param values the list of values to be pushed - * \param outs the list of buffers for the pulled data, they should be preallocated - * \param priority Priority of the action. + * \param okeys the list of keys to be pulled in string format. Should be the same set of keys in + * vkeys. \param values the list of values to be pushed \param outs the list of buffers for the + * pulled data, they should be preallocated \param priority Priority of the action. */ virtual void Broadcast(const std::vector& str_vkeys, const std::vector& str_okeys, @@ -242,10 +242,9 @@ class KVStore { /*! * \brief push and pull a list of key-value pairs from the store * \param vkeys the list of keys to be pushed in string format - * \param okeys the list of keys to be pulled in string format. Should be the same set of keys in vkeys. - * \param values the list of values to be pushed - * \param outs the list of buffers for the pulled data, they should be preallocated - * \param priority Priority of the action. + * \param okeys the list of keys to be pulled in string format. Should be the same set of keys in + * vkeys. \param values the list of values to be pushed \param outs the list of buffers for the + * pulled data, they should be preallocated \param priority Priority of the action. */ virtual void PushPull(const std::vector& str_vkeys, const std::vector& str_okeys, @@ -358,7 +357,8 @@ class KVStore { void set_barrier_before_exit(const bool barrier_before_exit) { #if MXNET_USE_DIST_KVSTORE - if (!IsWorkerNode()) LOG(FATAL) << "barrier_before_exit takes effect only on worker nodes"; + if (!IsWorkerNode()) + LOG(FATAL) << "barrier_before_exit takes effect only on worker nodes"; barrier_before_exit_ = barrier_before_exit; #else LOG(FATAL) << "compile with USE_DIST_KVSTORE=1 to enable barrier"; @@ -415,7 +415,7 @@ class KVStore { * all of them are reached this point. It doesn't guarantee that all * operations issued before are actually finished, such as \ref Push and \ref Pull. */ - virtual void Barrier() { } + virtual void Barrier() {} /** * \brief Send a command to all server nodes @@ -428,7 +428,7 @@ class KVStore { * \param cmd_id the head of the command * \param cmd_body the body of the command */ - virtual void SendCommandToServers(int cmd_id, const std::string& cmd_body) { } + virtual void SendCommandToServers(int cmd_id, const std::string& cmd_body) {} /** * \brief Sends server profiler commands to all server nodes @@ -462,7 +462,7 @@ class KVStore { * * \param controller the user-defined server controller */ - virtual void RunServer(const Controller& controller) { } + virtual void RunServer(const Controller& controller) {} protected: /** diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index f9525a28c4d4..dfdca6c6c588 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -47,8 +47,8 @@ #include #if defined(__NVCC__) - #include - #include +#include +#include #endif /* Make sure to update the version number everytime you make changes */ @@ -60,9 +60,9 @@ * see https://labjack.com/news/simple-cpp-symbol-visibility-demo for details */ #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) - #define PRIVATE_SYMBOL +#define PRIVATE_SYMBOL #else - #define PRIVATE_SYMBOL __attribute__ ((visibility ("hidden"))) +#define PRIVATE_SYMBOL __attribute__((visibility("hidden"))) #endif /* @@ -94,120 +94,120 @@ #ifdef __cplusplus extern "C" { - #endif +#endif +/*! + * \brief The device type in DLContext. + */ +typedef enum { + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLGPU = 2, /*! - * \brief The device type in DLContext. + * \brief Pinned CUDA GPU device by cudaMallocHost + * \note kDLCPUPinned = kDLCPU | kDLGPU */ - typedef enum { - /*! \brief CPU device */ - kDLCPU = 1, - /*! \brief CUDA GPU device */ - kDLGPU = 2, - /*! - * \brief Pinned CUDA GPU device by cudaMallocHost - * \note kDLCPUPinned = kDLCPU | kDLGPU - */ - kDLCPUPinned = 3, - /*! \brief OpenCL devices. */ - kDLOpenCL = 4, - /*! \brief Vulkan buffer for next generation graphics. */ - kDLVulkan = 7, - /*! \brief Metal for Apple GPU. */ - kDLMetal = 8, - /*! \brief Verilog simulator buffer */ - kDLVPI = 9, - /*! \brief ROCm GPUs for AMD GPUs */ - kDLROCM = 10, - /*! - * \brief Reserved extension device type, - * used for quickly test extension device - * The semantics can differ depending on the implementation. - */ - kDLExtDev = 12, - } DLDeviceType; - + kDLCPUPinned = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, /*! - * \brief A Device context for Tensor and operator. + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. */ - typedef struct { - /*! \brief The device type used in the device. */ - DLDeviceType device_type; - /*! \brief The device index */ - int device_id; - } DLContext; + kDLExtDev = 12, +} DLDeviceType; + +/*! + * \brief A Device context for Tensor and operator. + */ +typedef struct { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! \brief The device index */ + int device_id; +} DLContext; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + kDLInt = 0U, + kDLUInt = 1U, + kDLFloat = 2U, +} DLDataTypeCode; +/*! + * \brief The data type the tensor can hold. + * + * Examples + * - float: type_code = 2, bits = 32, lanes=1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 + * - int8: type_code = 0, bits = 8, lanes=1 + */ +typedef struct { /*! - * \brief The type code options DLDataType. + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. */ - typedef enum { - kDLInt = 0U, - kDLUInt = 1U, - kDLFloat = 2U, - } DLDataTypeCode; + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { /*! - * \brief The data type the tensor can hold. + * \brief The opaque data pointer points to the allocated data. This will be + * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always + * aligns to 256 bytes as in CUDA. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: * - * Examples - * - float: type_code = 2, bits = 32, lanes=1 - * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 - * - int8: type_code = 0, bits = 8, lanes=1 + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode */ - typedef struct { - /*! - * \brief Type code of base types. - * We keep it uint8_t instead of DLDataTypeCode for minimal memory - * footprint, but the value should be one of DLDataTypeCode enum values. - * */ - uint8_t code; - /*! - * \brief Number of bits, common choices are 8, 16, 32. - */ - uint8_t bits; - /*! \brief Number of lanes in the type, used for vector types. */ - uint16_t lanes; - } DLDataType; - + void* data; + /*! \brief The device context of the tensor */ + DLContext ctx; + /*! \brief Number of dimensions */ + int ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; /*! - * \brief Plain C Tensor object, does not manage memory. + * \brief strides of the tensor (in number of elements, not bytes) + * can be nullptr, indicating tensor is compact and row-majored. */ - typedef struct { - /*! - * \brief The opaque data pointer points to the allocated data. This will be - * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always - * aligns to 256 bytes as in CUDA. - * - * For given DLTensor, the size of memory required to store the contents of - * data is calculated as follows: - * - * \code{.c} - * static inline size_t GetDataSize(const DLTensor* t) { - * size_t size = 1; - * for (tvm_index_t i = 0; i < t->ndim; ++i) { - * size *= t->shape[i]; - * } - * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; - * return size; - * } - * \endcode - */ - void* data; - /*! \brief The device context of the tensor */ - DLContext ctx; - /*! \brief Number of dimensions */ - int ndim; - /*! \brief The data type of the pointer*/ - DLDataType dtype; - /*! \brief The shape of the tensor */ - int64_t* shape; - /*! - * \brief strides of the tensor (in number of elements, not bytes) - * can be nullptr, indicating tensor is compact and row-majored. - */ - int64_t* strides; - /*! \brief The offset in bytes to the beginning pointer to data */ - uint64_t byte_offset; - } DLTensor; + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; #ifdef __cplusplus } // DLPACK_EXTERN_C #endif @@ -250,11 +250,11 @@ enum MXDType { kFloat32 = 0, kFloat64 = 1, kFloat16 = 2, - kUint8 = 3, - kInt32 = 4, - kInt8 = 5, - kInt64 = 6, - kUNSET = 100, + kUint8 = 3, + kInt32 = 4, + kInt8 = 5, + kInt64 = 6, + kUNSET = 100, }; /* @@ -288,14 +288,14 @@ struct MXContext { }; enum MXReturnValue { - MX_FAIL = 0, + MX_FAIL = 0, MX_SUCCESS = 1, }; // For sparse tensors, read/write the data from NDarray via pointers. struct MXSparse { // Pointer to data. - void *data{nullptr}; + void* data{nullptr}; // length of (non-zero) data. int64_t data_len; @@ -310,8 +310,13 @@ struct MXSparse { int64_t* indptr = nullptr; int64_t indptr_len; - void set(void *data_ptr, const int64_t* dims, int ndims, void *idx, - int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0); + void set(void* data_ptr, + const int64_t* dims, + int ndims, + void* idx, + int64_t num_idx, + void* idx_ptr = nullptr, + int64_t num_idx_ptr = 0); }; /*! @@ -320,18 +325,27 @@ struct MXSparse { struct MXTensor { MXTensor(); MXTensor(const MXTensor& oth); - MXTensor(void *data_ptr, std::vector shape, MXDType dtype, - size_t vID, MXContext mx_ctx, MXStorageType stype = kDefaultStorage); + MXTensor(void* data_ptr, + std::vector shape, + MXDType dtype, + size_t vID, + MXContext mx_ctx, + MXStorageType stype = kDefaultStorage); /*! \brief populate internal tensor fields */ - void setTensor(void *dptr, MXDType type, const int64_t* dims, int ndims, - size_t vID, MXContext mx_ctx, MXStorageType storage_type); + void setTensor(void* dptr, + MXDType type, + const int64_t* dims, + int ndims, + size_t vID, + MXContext mx_ctx, + MXStorageType storage_type); /*! \brief populate DLTensor fields */ void setDLTensor(); /*! \brief helper function to cast data pointer */ - template + template inline data_type* data() { return reinterpret_cast(data_ptr); } @@ -340,11 +354,11 @@ struct MXTensor { int64_t size() const; /*! \brief helper function to compare two MXTensors */ - bool isSame(const MXTensor &oth) const; + bool isSame(const MXTensor& oth) const; // For dense, data_ptr points to 1D flattened tensor data // For sparse, data_ptr points to MXSparse - void *data_ptr; + void* data_ptr; // shape is in [2,3,4] format to represent high-dim tensor std::vector shape; @@ -371,16 +385,22 @@ typedef void* (*xpu_malloc_t)(void*, int); /*! \brief sparse alloc function to allocate memory inside Forward/Backward functions */ typedef void (*sparse_malloc_t)(void*, int, int, int, void**, int64_t**, int64_t**); /*! \brief resource malloc function to allocate ndarrays for graph passes */ -typedef void (*nd_malloc_t)(const void* _ndarray_alloc, const int64_t* shapes, int num_shapes, - const char* dev_str, int dev_id, int dtype, const char* name, - int isArg, void** data); +typedef void (*nd_malloc_t)(const void* _ndarray_alloc, + const int64_t* shapes, + int num_shapes, + const char* dev_str, + int dev_id, + int dtype, + const char* name, + int isArg, + void** data); /*! \brief GPU stream pointer, is void* when not compiled with CUDA */ #if defined(__NVCC__) - typedef cudaStream_t mx_stream_t; - typedef curandStatePhilox4_32_10_t mx_gpu_rand_t; +typedef cudaStream_t mx_stream_t; +typedef curandStatePhilox4_32_10_t mx_gpu_rand_t; #else - typedef void* mx_stream_t; - typedef void* mx_gpu_rand_t; +typedef void* mx_stream_t; +typedef void* mx_gpu_rand_t; #endif typedef std::mt19937 mx_cpu_rand_t; @@ -394,15 +414,20 @@ class PassResource { public: PassResource(std::unordered_map* new_args, std::unordered_map* new_aux, - nd_malloc_t nd_malloc, const void* nd_alloc); + nd_malloc_t nd_malloc, + const void* nd_alloc); // allocate new arg param, adds to args map, returns newly allocated tensor - MXTensor* alloc_arg(const std::string& name, const std::vector& shapes, - const MXContext &ctx, MXDType dtype) const; + MXTensor* alloc_arg(const std::string& name, + const std::vector& shapes, + const MXContext& ctx, + MXDType dtype) const; // allocate new aux param, adds to aux map, returns newly allocated tensor - MXTensor* alloc_aux(const std::string& name, const std::vector& shapes, - const MXContext &ctx, MXDType dtype) const; + MXTensor* alloc_aux(const std::string& name, + const std::vector& shapes, + const MXContext& ctx, + MXDType dtype) const; private: std::unordered_map* new_args_; @@ -416,10 +441,15 @@ class PassResource { */ class OpResource { public: - OpResource(xpu_malloc_t cpu_malloc_fp, void* cpu_alloc_fp, - xpu_malloc_t gpu_malloc_fp, void* gpu_alloc_fp, void* stream, - sparse_malloc_t sparse_malloc_fp, void* sparse_alloc_fp, - void* rng_cpu_states, void* rng_gpu_states); + OpResource(xpu_malloc_t cpu_malloc_fp, + void* cpu_alloc_fp, + xpu_malloc_t gpu_malloc_fp, + void* gpu_alloc_fp, + void* stream, + sparse_malloc_t sparse_malloc_fp, + void* sparse_alloc_fp, + void* rng_cpu_states, + void* rng_gpu_states); /*! \brief allocate cpu memory controlled by MXNet */ void* alloc_cpu(int size) const; @@ -452,11 +482,11 @@ class OpResource { /*! \brief lambda function to return allocated memory handle */ void *cpu_alloc, *gpu_alloc; /*! \brief cuda stream passed from MXNet */ - void *cuda_stream; + void* cuda_stream; /*! \brief sparse allocation lambda function */ sparse_malloc_t sparse_malloc; /*! \brief lambda function to return allocated sparse memory handle */ - void *sparse_alloc; + void* sparse_alloc; /*! \brief cpu and gpu rng fully inited and seeded states */ void *rand_cpu_states, *rand_gpu_states; }; @@ -484,7 +514,7 @@ std::string getShapeAt(const std::string& shape, unsigned index); * Examples: * * getDtypeAt("[1]", 0) returns "1" - * getDtypeAt("[1,2]", 1) returns "2" + * getDtypeAt("[1,2]", 1) returns "2" */ std::string getDtypeAt(const std::string& dtype, unsigned index); @@ -492,7 +522,7 @@ std::string getDtypeAt(const std::string& dtype, unsigned index); * \brief Json utility to parse serialized subgraph symbol */ /*! \brief Types of JSON objects */ -enum JsonType {ERR, STR, NUM, LIST, MAP}; +enum JsonType { ERR, STR, NUM, LIST, MAP }; /*! \brief definition of JSON objects */ struct JsonVal { @@ -505,7 +535,7 @@ struct JsonVal { explicit JsonVal(int n); // complex constructor JsonVal(JsonType t, int n, std::string s); - bool operator<(const JsonVal &o) const; + bool operator<(const JsonVal& o) const; // convert JSON object back to JSON-compatible string std::string dump() const; @@ -526,7 +556,7 @@ struct JsonVal { static JsonVal parse_map(const std::string& json, unsigned int* idx); // generic parse function - static JsonVal parse(const std::string& json, unsigned int *idx); + static JsonVal parse(const std::string& json, unsigned int* idx); // debug function to convert data structure to a debugstring std::string toString() const; @@ -547,7 +577,7 @@ class Graph; // Representation of an input/output to a node struct NodeEntry { Node* node; // other node thats producing/consuming inputs/outputs - int entry; // entry index from other node (ie. output index from producing node) + int entry; // entry index from other node (ie. output index from producing node) }; // Representation of a node in the graph @@ -559,19 +589,17 @@ class Node { void _setPassResource(PassResource* res_); /* \brief allocate an arg tensor for this node */ - void alloc_arg(const std::vector& shapes, - const MXContext &ctx, MXDType dtype); + void alloc_arg(const std::vector& shapes, const MXContext& ctx, MXDType dtype); /* \brief allocate an aux tensor for this node */ - void alloc_aux(const std::vector& shapes, - const MXContext &ctx, MXDType dtype); - - std::string op; // operator name (ie. Convolution) - std::string name; // unique node name (ie. conv_0 or conv_1) - MXTensor* tensor; // tensor data for input nodes - std::vector inputs; // set of inputs to the node - std::vector outputs; // set of outputs from the node - std::vector subgraphs; // set of subgraphs within this node + void alloc_aux(const std::vector& shapes, const MXContext& ctx, MXDType dtype); + + std::string op; // operator name (ie. Convolution) + std::string name; // unique node name (ie. conv_0 or conv_1) + MXTensor* tensor; // tensor data for input nodes + std::vector inputs; // set of inputs to the node + std::vector outputs; // set of outputs from the node + std::vector subgraphs; // set of subgraphs within this node std::unordered_map attrs; // node attributes private: @@ -599,7 +627,8 @@ class Graph { std::string toString() const; /* \brief visits a node "n" */ - void _dfs_util(Node* n, std::unordered_set* to_visit, + void _dfs_util(Node* n, + std::unordered_set* to_visit, std::function handler) const; /* \brief post-order DFS graph traversal */ @@ -668,8 +697,7 @@ class CustomOpSelector { * candidates - indices of nodes to include in subgraph * keep - indices of nodes to keep in subgraph */ - virtual void Filter(const std::vector& candidates, - std::vector* keep) { + virtual void Filter(const std::vector& candidates, std::vector* keep) { keep->insert(keep->end(), candidates.begin(), candidates.end()); } /* \brief Reset any selector state, called after growing subgraph, before filter @@ -688,14 +716,16 @@ class CustomStatefulOp { CustomStatefulOp(); virtual ~CustomStatefulOp(); - template - static CustomStatefulOp* create(Ts...args) { + template + static CustomStatefulOp* create(Ts... args) { CustomStatefulOp* op = new A(args...); - op->created = true; + op->created = true; return op; } - bool wasCreated() { return created; } + bool wasCreated() { + return created; + } virtual MXReturnValue Forward(std::vector* inputs, std::vector* outputs, @@ -714,35 +744,34 @@ class CustomStatefulOp { }; /*! \brief Custom Operator function templates */ -typedef MXReturnValue (*fcomp_t)(const std::unordered_map& attributes, +typedef MXReturnValue (*fcomp_t)(const std::unordered_map& attributes, std::vector* inputs, std::vector* outputs, const OpResource& res); -typedef MXReturnValue (*parseAttrs_t)(const std::unordered_map& attributes, - int* num_inputs, int* num_outputs); -typedef MXReturnValue (*inferType_t)(const std::unordered_map& attributes, +typedef MXReturnValue (*parseAttrs_t)( + const std::unordered_map& attributes, + int* num_inputs, + int* num_outputs); +typedef MXReturnValue (*inferType_t)(const std::unordered_map& attributes, std::vector* in_types, std::vector* out_types); -typedef MXReturnValue (*inferSType_t)(const std::unordered_map& attributes, - std::vector* in_storage_types, - std::vector* out_storage_types); -typedef MXReturnValue (*inferShape_t)(const std::unordered_map& attributes, - std::vector >* in_shapes, - std::vector >* out_shapes); -typedef MXReturnValue (*mutateInputs_t)(const std::unordered_map& attributes, - std::vector* input_indices); -typedef MXReturnValue (*createOpState_t)(const std::unordered_map& attributes, - const MXContext& ctx, - const std::vector >& in_shapes, - const std::vector in_types, - CustomStatefulOp**); +typedef MXReturnValue (*inferSType_t)( + const std::unordered_map& attributes, + std::vector* in_storage_types, + std::vector* out_storage_types); +typedef MXReturnValue (*inferShape_t)( + const std::unordered_map& attributes, + std::vector >* in_shapes, + std::vector >* out_shapes); +typedef MXReturnValue (*mutateInputs_t)( + const std::unordered_map& attributes, + std::vector* input_indices); +typedef MXReturnValue (*createOpState_t)( + const std::unordered_map& attributes, + const MXContext& ctx, + const std::vector >& in_shapes, + const std::vector in_types, + CustomStatefulOp**); /*! * \brief Class to hold custom operator registration @@ -816,19 +845,20 @@ class CustomPass { }; /*! \brief Custom Subgraph Create function template */ -typedef MXReturnValue (*supportedOps_t)(const mxnet::ext::Graph *graph, std::vector* ids, - const std::unordered_map& options); -typedef MXReturnValue (*createSelector_t)(const mxnet::ext::Graph *graph, - CustomOpSelector** sel_inst, - const std::unordered_map& options); -typedef MXReturnValue (*reviewSubgraph_t)(const mxnet::ext::Graph *subgraph, int subgraph_id, - bool* accept, - const std::unordered_map& options, - std::unordered_map* attrs); +typedef MXReturnValue (*supportedOps_t)( + const mxnet::ext::Graph* graph, + std::vector* ids, + const std::unordered_map& options); +typedef MXReturnValue (*createSelector_t)( + const mxnet::ext::Graph* graph, + CustomOpSelector** sel_inst, + const std::unordered_map& options); +typedef MXReturnValue (*reviewSubgraph_t)( + const mxnet::ext::Graph* subgraph, + int subgraph_id, + bool* accept, + const std::unordered_map& options, + std::unordered_map* attrs); /*! * \brief An abstract class for subgraph property @@ -839,8 +869,7 @@ class CustomPartitioner { explicit CustomPartitioner(const char* backend_name); - CustomPartitioner& addStrategy(const char* prop_name, - const char* sg_name); + CustomPartitioner& addStrategy(const char* prop_name, const char* sg_name); CustomPartitioner& setSupportedOps(const char* prop_name, supportedOps_t fn); @@ -885,7 +914,7 @@ class Registry { * \returns new object associated with registered name */ T& add(const char* name) { - T *entry = new T(name); + T* entry = new T(name); entries.push_back(entry); return *entry; } @@ -910,34 +939,35 @@ class Registry { * Annoyingly, the concat_ and concat macros are necessary to * be able to use __COUNTER__ in an identifier name */ -#define MX_STR_CONCAT_(__a, __b) __a ## __b -#define MX_STR_CONCAT(__a, __b) MX_STR_CONCAT_(__a, __b) +#define MX_STR_CONCAT_(__a, __b) __a##__b +#define MX_STR_CONCAT(__a, __b) MX_STR_CONCAT_(__a, __b) /*! \brief convert a token to a string */ #define MX_STRINGIFY(x) #x -#define MX_TOSTRING(x) MX_STRINGIFY(x) +#define MX_TOSTRING(x) MX_STRINGIFY(x) /*! \brief declare a variable with custom name */ -#define MX_REGISTER_NAME_(Name) MXNet ## _CustomOp ## _ ## Name -#define MX_REGISTER_DEF_(Name) mxnet::ext::CustomOp MX_REGISTER_NAME_(Name) +#define MX_REGISTER_NAME_(Name) MXNet##_CustomOp##_##Name +#define MX_REGISTER_DEF_(Name) mxnet::ext::CustomOp MX_REGISTER_NAME_(Name) -#define MX_REGISTER_PROP_NAME_(Name) MXNet ## _CustomSubProp ## _ ## Name -#define MX_REGISTER_PROP_DEF_(Name) mxnet::ext::CustomPartitioner MX_REGISTER_PROP_NAME_(Name) +#define MX_REGISTER_PROP_NAME_(Name) MXNet##_CustomSubProp##_##Name +#define MX_REGISTER_PROP_DEF_(Name) mxnet::ext::CustomPartitioner MX_REGISTER_PROP_NAME_(Name) -#define MX_REGISTER_PASS_NAME_(Name) MXNet ## _CustomPass ## _ ## Name -#define MX_REGISTER_PASS_DEF_(Name) mxnet::ext::CustomPass MX_REGISTER_PASS_NAME_(Name) +#define MX_REGISTER_PASS_NAME_(Name) MXNet##_CustomPass##_##Name +#define MX_REGISTER_PASS_DEF_(Name) mxnet::ext::CustomPass MX_REGISTER_PASS_NAME_(Name) /*! \brief assign a var to a value */ -#define REGISTER_OP(Name) MX_STR_CONCAT(MX_REGISTER_DEF_(Name), __COUNTER__) = \ - mxnet::ext::Registry::get()->add(MX_TOSTRING(Name)) +#define REGISTER_OP(Name) \ + MX_STR_CONCAT(MX_REGISTER_DEF_(Name), __COUNTER__) = \ + mxnet::ext::Registry::get()->add(MX_TOSTRING(Name)) -#define REGISTER_PARTITIONER(Name) \ +#define REGISTER_PARTITIONER(Name) \ MX_STR_CONCAT(MX_REGISTER_PROP_DEF_(Name), __COUNTER__) = \ - mxnet::ext::Registry::get()->add(MX_TOSTRING(Name)) + mxnet::ext::Registry::get()->add(MX_TOSTRING(Name)) -#define REGISTER_PASS(Name) \ +#define REGISTER_PASS(Name) \ MX_STR_CONCAT(MX_REGISTER_PASS_DEF_(Name), __COUNTER__) = \ - mxnet::ext::Registry::get()->add(MX_TOSTRING(Name)) + mxnet::ext::Registry::get()->add(MX_TOSTRING(Name)) /* -------------- BELOW ARE CTYPE FUNCTIONS PROTOTYPES --------------- */ @@ -950,94 +980,172 @@ class Registry { typedef int (*opRegSize_t)(void); #define MXLIB_OPREGGET_STR "_opRegGet" -typedef int (*opRegGet_t)(int idx, const char** name, int *isSGop, - const char*** forward_ctx, mxnet::ext::fcomp_t** forward_fp, - int* forward_count, const char*** backward_ctx, - mxnet::ext::fcomp_t** backward_fp, int* backward_count, - const char*** create_op_ctx, mxnet::ext::createOpState_t** create_op_fp, - int* create_op_count, mxnet::ext::parseAttrs_t* parse, - mxnet::ext::inferType_t* type, mxnet::ext::inferSType_t* stype, - mxnet::ext::inferShape_t* shape, mxnet::ext::mutateInputs_t* mutate); +typedef int (*opRegGet_t)(int idx, + const char** name, + int* isSGop, + const char*** forward_ctx, + mxnet::ext::fcomp_t** forward_fp, + int* forward_count, + const char*** backward_ctx, + mxnet::ext::fcomp_t** backward_fp, + int* backward_count, + const char*** create_op_ctx, + mxnet::ext::createOpState_t** create_op_fp, + int* create_op_count, + mxnet::ext::parseAttrs_t* parse, + mxnet::ext::inferType_t* type, + mxnet::ext::inferSType_t* stype, + mxnet::ext::inferShape_t* shape, + mxnet::ext::mutateInputs_t* mutate); #define MXLIB_OPCALLFREE_STR "_opCallFree" typedef int (*opCallFree_t)(void* ptr); #define MXLIB_OPCALLPARSEATTRS_STR "_opCallParseAttrs" -typedef int (*opCallParseAttrs_t)(parseAttrs_t parseAttrs, const char* const* keys, - const char* const* vals, int num, - int* num_in, int* num_out); +typedef int (*opCallParseAttrs_t)(parseAttrs_t parseAttrs, + const char* const* keys, + const char* const* vals, + int num, + int* num_in, + int* num_out); #define MXLIB_OPCALLINFERSHAPE_STR "_opCallInferShape" -typedef int (*opCallInferShape_t)(inferShape_t inferShape, const char* const* keys, - const char* const* vals, int num, - unsigned int** inshapes, int* indims, int num_in, - unsigned int*** mod_inshapes, int** mod_indims, - unsigned int*** outshapes, int** outdims, int num_out); +typedef int (*opCallInferShape_t)(inferShape_t inferShape, + const char* const* keys, + const char* const* vals, + int num, + unsigned int** inshapes, + int* indims, + int num_in, + unsigned int*** mod_inshapes, + int** mod_indims, + unsigned int*** outshapes, + int** outdims, + int num_out); #define MXLIB_OPCALLINFERTYPE_STR "_opCallInferType" -typedef int (*opCallInferType_t)(inferType_t inferType, const char* const* keys, - const char* const* vals, int num, - int* intypes, int num_in, int* outtypes, int num_out); +typedef int (*opCallInferType_t)(inferType_t inferType, + const char* const* keys, + const char* const* vals, + int num, + int* intypes, + int num_in, + int* outtypes, + int num_out); #define MXLIB_OPCALLINFERSTYPE_STR "_opCallInferSType" -typedef int (*opCallInferSType_t)(inferSType_t inferSType, const char* const* keys, - const char* const* vals, int num, - int* intypes, int num_in, int* outtypes, int num_out); +typedef int (*opCallInferSType_t)(inferSType_t inferSType, + const char* const* keys, + const char* const* vals, + int num, + int* intypes, + int num_in, + int* outtypes, + int num_out); #define MXLIB_OPCALLFCOMP_STR "_opCallFCompute" -typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, - const char* const* vals, int num, - const int64_t** inshapes, int* indims, - void** indata, int* intypes, - size_t* inIDs, const char** indev_type, - int* indev_id, int num_in, - const int64_t** outshapes, int* outdims, - void** outdata, int* outtypes, - size_t* outIDs, const char** outdev_type, - int* outdev_id, int num_out, - xpu_malloc_t cpu_malloc, void* cpu_alloc, - xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - sparse_malloc_t sparse_malloc, void* sparse_alloc, - int* instypes, int* outstypes, - void** in_indices, void** out_indices, - void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes, - void* rng_cpu_states, void* rng_gpu_states); +typedef int (*opCallFComp_t)(fcomp_t fcomp, + const char* const* keys, + const char* const* vals, + int num, + const int64_t** inshapes, + int* indims, + void** indata, + int* intypes, + size_t* inIDs, + const char** indev_type, + int* indev_id, + int num_in, + const int64_t** outshapes, + int* outdims, + void** outdata, + int* outtypes, + size_t* outIDs, + const char** outdev_type, + int* outdev_id, + int num_out, + xpu_malloc_t cpu_malloc, + void* cpu_alloc, + xpu_malloc_t gpu_malloc, + void* gpu_alloc, + void* cuda_stream, + sparse_malloc_t sparse_malloc, + void* sparse_alloc, + int* instypes, + int* outstypes, + void** in_indices, + void** out_indices, + void** in_indptr, + void** out_indptr, + int64_t* in_indices_shapes, + int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, + int64_t* out_indptr_shapes, + void* rng_cpu_states, + void* rng_gpu_states); #define MXLIB_OPCALLMUTATEINPUTS_STR "_opCallMutateInputs" -typedef int (*opCallMutateInputs_t)(mutateInputs_t mutate, const char* const* keys, - const char* const* vals, int num, - int** mutate_indices, int* indices_size); +typedef int (*opCallMutateInputs_t)(mutateInputs_t mutate, + const char* const* keys, + const char* const* vals, + int num, + int** mutate_indices, + int* indices_size); #define MXLIB_OPCALLCREATEOPSTATE_STR "_opCallCreateOpState" -typedef int (*opCallCreateOpState_t)(createOpState_t create_op, const char* const* keys, - const char* const* vals, int num, const char* dev_type, - int dev_id, unsigned int** inshapes, int* indims, - int num_in, const int* intypes, void** state_op); +typedef int (*opCallCreateOpState_t)(createOpState_t create_op, + const char* const* keys, + const char* const* vals, + int num, + const char* dev_type, + int dev_id, + unsigned int** inshapes, + int* indims, + int num_in, + const int* intypes, + void** state_op); #define MXLIB_OPCALLDESTROYOPSTATE_STR "_opCallDestroyOpState" typedef int (*opCallDestroyOpState_t)(void* state_op); #define MXLIB_OPCALLFSTATEFULCOMP_STR "_opCallFStatefulCompute" -typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, - const int64_t** inshapes, int* indims, - void** indata, int* intypes, - size_t* inIDs, const char** indev_type, - int* indev_id, int num_in, - const int64_t** outshapes, int* outdims, - void** outdata, int* outtypes, - size_t* outIDs, const char** outdev_type, - int* outdev_id, int num_out, - xpu_malloc_t cpu_malloc, void* cpu_alloc, - xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - sparse_malloc_t sparse_malloc, void* sparse_alloc, - int* instypes, int* outstypes, - void** in_indices, void** out_indices, - void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes, - void* rng_cpu_states, void* rng_gpu_states); +typedef int (*opCallFStatefulComp_t)(int is_forward, + void* state_op, + const int64_t** inshapes, + int* indims, + void** indata, + int* intypes, + size_t* inIDs, + const char** indev_type, + int* indev_id, + int num_in, + const int64_t** outshapes, + int* outdims, + void** outdata, + int* outtypes, + size_t* outIDs, + const char** outdev_type, + int* outdev_id, + int num_out, + xpu_malloc_t cpu_malloc, + void* cpu_alloc, + xpu_malloc_t gpu_malloc, + void* gpu_alloc, + void* stream, + sparse_malloc_t sparse_malloc, + void* sparse_alloc, + int* instypes, + int* outstypes, + void** in_indices, + void** out_indices, + void** in_indptr, + void** out_indptr, + int64_t* in_indices_shapes, + int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, + int64_t* out_indptr_shapes, + void* rng_cpu_states, + void* rng_gpu_states); #define MXLIB_PARTREGSIZE_STR "_partRegSize" typedef int (*partRegSize_t)(void); @@ -1046,52 +1154,81 @@ typedef int (*partRegSize_t)(void); typedef int (*partRegGetCount_t)(int idx, const char** name); #define MXLIB_PARTREGGET_STR "_partRegGet" -typedef void (*partRegGet_t)(int part_idx, int stg_idx, const char** strategy, - supportedOps_t* supportedOps, createSelector_t* createSelector, - reviewSubgraph_t* reviewSubgraph, const char** op_name); +typedef void (*partRegGet_t)(int part_idx, + int stg_idx, + const char** strategy, + supportedOps_t* supportedOps, + createSelector_t* createSelector, + reviewSubgraph_t* reviewSubgraph, + const char** op_name); #define MXLIB_PARTCALLSUPPORTEDOPS_STR "_partCallSupportedOps" -typedef int (*partCallSupportedOps_t)(supportedOps_t supportedOps, const char *json, - int num_ids, int *ids, const char* const* opt_keys, - const char* const* opt_vals, int num_opts); +typedef int (*partCallSupportedOps_t)(supportedOps_t supportedOps, + const char* json, + int num_ids, + int* ids, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts); #define MXLIB_PARTCALLCREATESELECTOR_STR "_partCallCreateSelector" -typedef int (*partCallCreateSelector_t)(createSelector_t createSelector, const char *json, - void** selector, const char* const* opt_keys, - const char* const* opt_vals, int num_opts); +typedef int (*partCallCreateSelector_t)(createSelector_t createSelector, + const char* json, + void** selector, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts); #define MXLIB_PARTCALLSELECT_STR "_partCallSelect" typedef void (*partCallSelect_t)(void* sel_inst, int nodeID, int* selected); #define MXLIB_PARTCALLSELECTINPUT_STR "_partCallSelectInput" -typedef void (*partCallSelectInput_t)(void* sel_inst, int nodeID, int input_nodeID, - int* selected); +typedef void (*partCallSelectInput_t)(void* sel_inst, int nodeID, int input_nodeID, int* selected); #define MXLIB_PARTCALLSELECTOUTPUT_STR "_partCallSelectOutput" -typedef void (*partCallSelectOutput_t)(void* sel_inst, int nodeID, int output_nodeID, - int* selected); +typedef void (*partCallSelectOutput_t)(void* sel_inst, + int nodeID, + int output_nodeID, + int* selected); #define MXLIB_PARTCALLFILTER_STR "_partCallFilter" -typedef void (*partCallFilter_t)(void* sel_inst, int* candidates, int num_candidates, - int** keep, int* num_keep); +typedef void (*partCallFilter_t)(void* sel_inst, + int* candidates, + int num_candidates, + int** keep, + int* num_keep); #define MXLIB_PARTCALLRESET_STR "_partCallReset" typedef void (*partCallReset_t)(void* sel_inst); #define MXLIB_PARTCALLREVIEWSUBGRAPH_STR "_partCallReviewSubgraph" -typedef int (*partCallReviewSubgraph_t)(reviewSubgraph_t reviewSubgraph, const char *json, - int subgraph_id, int *accept, const char* const* opt_keys, - const char* const* opt_vals, int num_opts, - char*** attr_keys, char*** attr_vals, int *num_attrs, - const char* const* arg_names, int num_args, - void* const* arg_data, const int64_t* const* arg_shapes, - const int* arg_dims, const int* arg_types, - const size_t* arg_IDs, const char* const* arg_dev_type, +typedef int (*partCallReviewSubgraph_t)(reviewSubgraph_t reviewSubgraph, + const char* json, + int subgraph_id, + int* accept, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts, + char*** attr_keys, + char*** attr_vals, + int* num_attrs, + const char* const* arg_names, + int num_args, + void* const* arg_data, + const int64_t* const* arg_shapes, + const int* arg_dims, + const int* arg_types, + const size_t* arg_IDs, + const char* const* arg_dev_type, const int* arg_dev_id, - const char* const* aux_names, int num_aux, - void* const* aux_data, const int64_t* const* aux_shapes, - const int* aux_dims, const int* aux_types, - const size_t* aux_IDs, const char* const* aux_dev_type, + const char* const* aux_names, + int num_aux, + void* const* aux_data, + const int64_t* const* aux_shapes, + const int* aux_dims, + const int* aux_types, + const size_t* aux_IDs, + const char* const* aux_dev_type, const int* aux_dev_id); #define MXLIB_PASSREGSIZE_STR "_passRegSize" @@ -1101,19 +1238,32 @@ typedef int (*passRegSize_t)(void); typedef void (*passRegGet_t)(int pass_idx, graphPass_t* graphPass, const char** pass_name); #define MXLIB_PASSCALLGRAPHPASS_STR "_passCallGraphPass" -typedef int (*passCallGraphPass_t)(graphPass_t graphPass, const char *in_graph, - char** out_graph, const char* const* opt_keys, - const char* const* opt_vals, int num_opts, - const char* pass_name, const char* const* arg_names, - int num_args, void* const* arg_data, - const int64_t* const* arg_shapes, const int* arg_dims, - const int* arg_types, const size_t* arg_IDs, - const char* const* arg_dev_type, const int* arg_dev_id, - const char* const* aux_names, int num_aux, - void* const* aux_data, const int64_t* const* aux_shapes, - const int* aux_dims, const int* aux_types, - const size_t* aux_IDs, const char* const* aux_dev_type, - const int* aux_dev_id, nd_malloc_t nd_malloc, +typedef int (*passCallGraphPass_t)(graphPass_t graphPass, + const char* in_graph, + char** out_graph, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts, + const char* pass_name, + const char* const* arg_names, + int num_args, + void* const* arg_data, + const int64_t* const* arg_shapes, + const int* arg_dims, + const int* arg_types, + const size_t* arg_IDs, + const char* const* arg_dev_type, + const int* arg_dev_id, + const char* const* aux_names, + int num_aux, + void* const* aux_data, + const int64_t* const* aux_shapes, + const int* aux_dims, + const int* aux_types, + const size_t* aux_IDs, + const char* const* aux_dev_type, + const int* aux_dev_id, + nd_malloc_t nd_malloc, const void* nd_alloc); #define MXLIB_INITIALIZE_STR "initialize" @@ -1133,8 +1283,11 @@ class CustomStatefulOpWrapper { public: ~CustomStatefulOpWrapper(); explicit CustomStatefulOpWrapper(CustomStatefulOp* inst, opCallDestroyOpState_t destroy) - : instance(inst), destroy_(destroy) {} - CustomStatefulOp* get_instance() { return instance; } + : instance(inst), destroy_(destroy) {} + CustomStatefulOp* get_instance() { + return instance; + } + private: CustomStatefulOp* instance; opCallDestroyOpState_t destroy_; @@ -1152,194 +1305,315 @@ class CustomStatefulOpWrapper { } // namespace mxnet extern "C" { - /*! \brief returns MXNet library version */ - MX_INT_RET _opVersion(); - - /*! \brief returns number of ops registered in this library */ - MX_INT_RET _opRegSize(); - - /*! \brief returns operator registration at specified index */ - MX_VOID_RET _opRegGet(int idx, const char** name, int *isSGop, - const char*** forward_ctx, mxnet::ext::fcomp_t** forward_fp, - int* forward_count, const char*** backward_ctx, - mxnet::ext::fcomp_t** backward_fp, int* backward_count, - const char*** create_op_ctx, mxnet::ext::createOpState_t** create_op_fp, - int* create_op_count, mxnet::ext::parseAttrs_t* parse, - mxnet::ext::inferType_t* type, mxnet::ext::inferSType_t* stype, - mxnet::ext::inferShape_t* shape, mxnet::ext::mutateInputs_t* mutate); - - /*! \brief calls free from the external library for library allocated arrays */ - MX_VOID_RET _opCallFree(void* ptr); - - /*! \brief returns status of calling parse attributes function for operator from library */ - MX_INT_RET _opCallParseAttrs(mxnet::ext::parseAttrs_t parseAttrs, const char* const* keys, - const char* const* vals, int num, - int* num_in, int* num_out); - - /*! \brief returns status of calling inferShape function for operator from library */ - MX_INT_RET _opCallInferShape(mxnet::ext::inferShape_t inferShape, const char* const* keys, - const char* const* vals, int num, - unsigned int** inshapes, int* indims, int num_in, - unsigned int*** mod_inshapes, int** mod_indims, - unsigned int*** outshapes, int** outdims, int num_out); - - /*! \brief returns status of calling inferType function for operator from library */ - MX_INT_RET _opCallInferType(mxnet::ext::inferType_t inferType, const char* const* keys, - const char* const* vals, int num, - int* intypes, int num_in, int* outtypes, int num_out); - - /*! \brief returns status of calling inferSType function for operator from library */ - MX_INT_RET _opCallInferSType(mxnet::ext::inferSType_t inferSType, const char* const* keys, - const char* const* vals, int num, - int* instypes, int num_in, int* outstypes, int num_out); - - /*! \brief returns status of calling Forward/Backward function for operator from library */ - MX_INT_RET _opCallFCompute(mxnet::ext::fcomp_t fcomp, const char* const* keys, +/*! \brief returns MXNet library version */ +MX_INT_RET _opVersion(); + +/*! \brief returns number of ops registered in this library */ +MX_INT_RET _opRegSize(); + +/*! \brief returns operator registration at specified index */ +MX_VOID_RET _opRegGet(int idx, + const char** name, + int* isSGop, + const char*** forward_ctx, + mxnet::ext::fcomp_t** forward_fp, + int* forward_count, + const char*** backward_ctx, + mxnet::ext::fcomp_t** backward_fp, + int* backward_count, + const char*** create_op_ctx, + mxnet::ext::createOpState_t** create_op_fp, + int* create_op_count, + mxnet::ext::parseAttrs_t* parse, + mxnet::ext::inferType_t* type, + mxnet::ext::inferSType_t* stype, + mxnet::ext::inferShape_t* shape, + mxnet::ext::mutateInputs_t* mutate); + +/*! \brief calls free from the external library for library allocated arrays */ +MX_VOID_RET _opCallFree(void* ptr); + +/*! \brief returns status of calling parse attributes function for operator from library */ +MX_INT_RET _opCallParseAttrs(mxnet::ext::parseAttrs_t parseAttrs, + const char* const* keys, const char* const* vals, - int num, const int64_t** inshapes, int* indims, void** indata, - int* intypes, size_t* inIDs, const char** indev_type, int* indev_id, - int num_in, const int64_t** outshapes, int* outdims, void** outdata, - int* outtypes, size_t* outIDs, const char** outdev_type, - int* outdev_id, int num_out, mxnet::ext::xpu_malloc_t cpu_malloc, - void* cpu_alloc, - mxnet::ext::xpu_malloc_t gpu_malloc, void* gpu_alloc, - void* cuda_stream, - mxnet::ext::sparse_malloc_t sparse_malloc, void* sparse_alloc, - int* instypes, int* outstypes, void** in_indices, void** out_indices, - void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes, - void* rng_cpu_states, void* rng_gpu_states); - - /*! \brief returns status of calling mutateInputs function for operator from library */ - MX_INT_RET _opCallMutateInputs(mxnet::ext::mutateInputs_t mutate, const char* const* keys, - const char* const* vals, int num, - int** mutate_indices, int* indices_size); - - /*! \brief returns status of calling createStatefulOp function for operator from library */ - MX_INT_RET _opCallCreateOpState(mxnet::ext::createOpState_t create_op, const char* const* keys, - const char* const* vals, int num, const char* dev_type, - int dev_id, unsigned int** inshapes, int* indims, - int num_in, const int* intypes, void** state_op); - - /*! \brief returns status of deleting StatefulOp instance for operator from library */ - MX_VOID_RET _opCallDestroyOpState(void* state_op); - - /*! \brief returns status of calling Stateful Forward/Backward for operator from library */ - MX_INT_RET _opCallFStatefulCompute(int is_forward, void* state_op, const int64_t** inshapes, - int* indims, void** indata, int* intypes, size_t* inIDs, - const char** indev_type, int* indev_id, int num_in, - const int64_t** outshapes, int* outdims, void** outdata, - int* outtypes, size_t* outIDs, const char** outdev_type, - int* outdev_id, int num_out, - mxnet::ext::xpu_malloc_t cpu_malloc, - void* cpu_alloc, mxnet::ext::xpu_malloc_t gpu_malloc, - void* gpu_alloc, - void* stream, mxnet::ext::sparse_malloc_t sparse_malloc, - void* sparse_alloc, int* instypes, int* outstypes, - void** in_indices, void** out_indices, void** in_indptr, - void** out_indptr, int64_t* in_indices_shapes, - int64_t* out_indices_shapes, int64_t* in_indptr_shapes, - int64_t* out_indptr_shapes, - void* rng_cpu_states, void* rng_gpu_states); - - /*! \brief returns number of partitioners registered in this library */ - MX_INT_RET _partRegSize(); - - /* returns number of strategies registered for partitioner - * at specified index */ - MX_INT_RET _partRegGetCount(int idx, const char** name); - - /*! \brief returns partitioner registration at specified index */ - MX_VOID_RET _partRegGet(int part_idx, int stg_idx, const char** strategy, - mxnet::ext::supportedOps_t* supportedOps, - mxnet::ext::createSelector_t* createSelector, - mxnet::ext::reviewSubgraph_t* reviewSubgraph, const char** op_name); - - /*! \brief returns status of calling supported ops function from library */ - MX_INT_RET _partCallSupportedOps(mxnet::ext::supportedOps_t supportedOps, const char *json, - int num_ids, int *ids, const char* const* opt_keys, - const char* const* opt_vals, int num_opts); - - /*! \brief returns status of calling create selector function from library */ - MX_INT_RET _partCallCreateSelector(mxnet::ext::createSelector_t createSelector, const char *json, - void** selector, const char* const* opt_keys, - const char* const* opt_vals, int num_opts); - - /*! \brief returns status of calling select function from library */ - MX_VOID_RET _partCallSelect(void* sel_inst, int nodeID, int* selected); - - /*! \brief returns status of calling select input function from library */ - MX_VOID_RET _partCallSelectInput(void* sel_inst, int nodeID, - int input_nodeID, int* selected); - - /*! \brief returns status of calling select output function from library */ - MX_VOID_RET _partCallSelectOutput(void* sel_inst, int nodeID, - int output_nodeID, int* selected); - - /*! \brief returns status of calling filter function from library */ - MX_VOID_RET _partCallFilter(void* sel_inst, int* candidates, int num_candidates, - int** keep, int* num_keep); - - /*! \brief returns status of calling reset selector function from library */ - MX_VOID_RET _partCallReset(void* sel_inst); - - /*! \brief returns status of calling review subgraph function from library */ - MX_INT_RET _partCallReviewSubgraph(mxnet::ext::reviewSubgraph_t reviewSubgraph, const char *json, - int subgraph_id, int *accept, const char* const* opt_keys, - const char* const* opt_vals, int num_opts, - char*** attr_keys, char*** attr_vals, int *num_attrs, - const char* const* arg_names, int num_args, - void* const* arg_data, const int64_t* const* arg_shapes, - const int* arg_dims, const int* arg_types, - const size_t* arg_IDs, const char* const* arg_dev_type, - const int* arg_dev_id, - const char* const* aux_names, int num_aux, - void* const* aux_data, const int64_t* const* aux_shapes, - const int* aux_dims, const int* aux_types, - const size_t* aux_IDs, const char* const* aux_dev_type, - const int* aux_dev_id); - - /*! \brief returns number of graph passes registered in this library */ - MX_INT_RET _passRegSize(); - - /*! \brief returns pass registration at specified index */ - MX_VOID_RET _passRegGet(int pass_idx, mxnet::ext::graphPass_t* graphPass, - const char** pass_name); - - /*! \brief returns status of calling graph pass function from library */ - MX_INT_RET _passCallGraphPass(mxnet::ext::graphPass_t graphPass, const char *json, - char** out_graph, const char* const* opt_keys, - const char* const* opt_vals, int num_opts, - const char* pass_name, const char* const* arg_names, int num_args, - void* const* arg_data, const int64_t* const* arg_shapes, - const int* arg_dims, const int* arg_types, - const size_t* arg_IDs, const char* const* arg_dev_type, - const int* arg_dev_id, const char* const* aux_names, int num_aux, - void* const* aux_data, const int64_t* const* aux_shapes, - const int* aux_dims, const int* aux_types, - const size_t* aux_IDs, const char* const* aux_dev_type, - const int* aux_dev_id, mxnet::ext::nd_malloc_t nd_malloc, - const void* nd_alloc); + int num, + int* num_in, + int* num_out); - /*! - * \brief Checks if the MXNet version is supported by the library. - * If supported, initializes the library. - * \param version MXNet version number passed to library and defined as: - * MXNET_VERSION = (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) - * \return Non-zero value on error i.e. library incompatible with passed MXNet version - */ +/*! \brief returns status of calling inferShape function for operator from library */ +MX_INT_RET _opCallInferShape(mxnet::ext::inferShape_t inferShape, + const char* const* keys, + const char* const* vals, + int num, + unsigned int** inshapes, + int* indims, + int num_in, + unsigned int*** mod_inshapes, + int** mod_indims, + unsigned int*** outshapes, + int** outdims, + int num_out); + +/*! \brief returns status of calling inferType function for operator from library */ +MX_INT_RET _opCallInferType(mxnet::ext::inferType_t inferType, + const char* const* keys, + const char* const* vals, + int num, + int* intypes, + int num_in, + int* outtypes, + int num_out); + +/*! \brief returns status of calling inferSType function for operator from library */ +MX_INT_RET _opCallInferSType(mxnet::ext::inferSType_t inferSType, + const char* const* keys, + const char* const* vals, + int num, + int* instypes, + int num_in, + int* outstypes, + int num_out); + +/*! \brief returns status of calling Forward/Backward function for operator from library */ +MX_INT_RET _opCallFCompute(mxnet::ext::fcomp_t fcomp, + const char* const* keys, + const char* const* vals, + int num, + const int64_t** inshapes, + int* indims, + void** indata, + int* intypes, + size_t* inIDs, + const char** indev_type, + int* indev_id, + int num_in, + const int64_t** outshapes, + int* outdims, + void** outdata, + int* outtypes, + size_t* outIDs, + const char** outdev_type, + int* outdev_id, + int num_out, + mxnet::ext::xpu_malloc_t cpu_malloc, + void* cpu_alloc, + mxnet::ext::xpu_malloc_t gpu_malloc, + void* gpu_alloc, + void* cuda_stream, + mxnet::ext::sparse_malloc_t sparse_malloc, + void* sparse_alloc, + int* instypes, + int* outstypes, + void** in_indices, + void** out_indices, + void** in_indptr, + void** out_indptr, + int64_t* in_indices_shapes, + int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, + int64_t* out_indptr_shapes, + void* rng_cpu_states, + void* rng_gpu_states); + +/*! \brief returns status of calling mutateInputs function for operator from library */ +MX_INT_RET _opCallMutateInputs(mxnet::ext::mutateInputs_t mutate, + const char* const* keys, + const char* const* vals, + int num, + int** mutate_indices, + int* indices_size); + +/*! \brief returns status of calling createStatefulOp function for operator from library */ +MX_INT_RET _opCallCreateOpState(mxnet::ext::createOpState_t create_op, + const char* const* keys, + const char* const* vals, + int num, + const char* dev_type, + int dev_id, + unsigned int** inshapes, + int* indims, + int num_in, + const int* intypes, + void** state_op); + +/*! \brief returns status of deleting StatefulOp instance for operator from library */ +MX_VOID_RET _opCallDestroyOpState(void* state_op); + +/*! \brief returns status of calling Stateful Forward/Backward for operator from library */ +MX_INT_RET _opCallFStatefulCompute(int is_forward, + void* state_op, + const int64_t** inshapes, + int* indims, + void** indata, + int* intypes, + size_t* inIDs, + const char** indev_type, + int* indev_id, + int num_in, + const int64_t** outshapes, + int* outdims, + void** outdata, + int* outtypes, + size_t* outIDs, + const char** outdev_type, + int* outdev_id, + int num_out, + mxnet::ext::xpu_malloc_t cpu_malloc, + void* cpu_alloc, + mxnet::ext::xpu_malloc_t gpu_malloc, + void* gpu_alloc, + void* stream, + mxnet::ext::sparse_malloc_t sparse_malloc, + void* sparse_alloc, + int* instypes, + int* outstypes, + void** in_indices, + void** out_indices, + void** in_indptr, + void** out_indptr, + int64_t* in_indices_shapes, + int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, + int64_t* out_indptr_shapes, + void* rng_cpu_states, + void* rng_gpu_states); + +/*! \brief returns number of partitioners registered in this library */ +MX_INT_RET _partRegSize(); + +/* returns number of strategies registered for partitioner + * at specified index */ +MX_INT_RET _partRegGetCount(int idx, const char** name); + +/*! \brief returns partitioner registration at specified index */ +MX_VOID_RET _partRegGet(int part_idx, + int stg_idx, + const char** strategy, + mxnet::ext::supportedOps_t* supportedOps, + mxnet::ext::createSelector_t* createSelector, + mxnet::ext::reviewSubgraph_t* reviewSubgraph, + const char** op_name); + +/*! \brief returns status of calling supported ops function from library */ +MX_INT_RET _partCallSupportedOps(mxnet::ext::supportedOps_t supportedOps, + const char* json, + int num_ids, + int* ids, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts); + +/*! \brief returns status of calling create selector function from library */ +MX_INT_RET _partCallCreateSelector(mxnet::ext::createSelector_t createSelector, + const char* json, + void** selector, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts); + +/*! \brief returns status of calling select function from library */ +MX_VOID_RET _partCallSelect(void* sel_inst, int nodeID, int* selected); + +/*! \brief returns status of calling select input function from library */ +MX_VOID_RET _partCallSelectInput(void* sel_inst, int nodeID, int input_nodeID, int* selected); + +/*! \brief returns status of calling select output function from library */ +MX_VOID_RET _partCallSelectOutput(void* sel_inst, int nodeID, int output_nodeID, int* selected); + +/*! \brief returns status of calling filter function from library */ +MX_VOID_RET _partCallFilter(void* sel_inst, + int* candidates, + int num_candidates, + int** keep, + int* num_keep); + +/*! \brief returns status of calling reset selector function from library */ +MX_VOID_RET _partCallReset(void* sel_inst); + +/*! \brief returns status of calling review subgraph function from library */ +MX_INT_RET _partCallReviewSubgraph(mxnet::ext::reviewSubgraph_t reviewSubgraph, + const char* json, + int subgraph_id, + int* accept, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts, + char*** attr_keys, + char*** attr_vals, + int* num_attrs, + const char* const* arg_names, + int num_args, + void* const* arg_data, + const int64_t* const* arg_shapes, + const int* arg_dims, + const int* arg_types, + const size_t* arg_IDs, + const char* const* arg_dev_type, + const int* arg_dev_id, + const char* const* aux_names, + int num_aux, + void* const* aux_data, + const int64_t* const* aux_shapes, + const int* aux_dims, + const int* aux_types, + const size_t* aux_IDs, + const char* const* aux_dev_type, + const int* aux_dev_id); + +/*! \brief returns number of graph passes registered in this library */ +MX_INT_RET _passRegSize(); + +/*! \brief returns pass registration at specified index */ +MX_VOID_RET _passRegGet(int pass_idx, mxnet::ext::graphPass_t* graphPass, const char** pass_name); + +/*! \brief returns status of calling graph pass function from library */ +MX_INT_RET _passCallGraphPass(mxnet::ext::graphPass_t graphPass, + const char* json, + char** out_graph, + const char* const* opt_keys, + const char* const* opt_vals, + int num_opts, + const char* pass_name, + const char* const* arg_names, + int num_args, + void* const* arg_data, + const int64_t* const* arg_shapes, + const int* arg_dims, + const int* arg_types, + const size_t* arg_IDs, + const char* const* arg_dev_type, + const int* arg_dev_id, + const char* const* aux_names, + int num_aux, + void* const* aux_data, + const int64_t* const* aux_shapes, + const int* aux_dims, + const int* aux_types, + const size_t* aux_IDs, + const char* const* aux_dev_type, + const int* aux_dev_id, + mxnet::ext::nd_malloc_t nd_malloc, + const void* nd_alloc); + +/*! + * \brief Checks if the MXNet version is supported by the library. + * If supported, initializes the library. + * \param version MXNet version number passed to library and defined as: + * MXNET_VERSION = (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) + * \return Non-zero value on error i.e. library incompatible with passed MXNet version + */ #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) - __declspec(dllexport) mxnet::ext::MXReturnValue __cdecl +__declspec(dllexport) mxnet::ext::MXReturnValue __cdecl #else - mxnet::ext::MXReturnValue +mxnet::ext::MXReturnValue #endif - initialize(int version); + initialize(int version); - MX_INT_RET _msgSize(); +MX_INT_RET _msgSize(); - /*! \brief returns operator registration at specified index */ - MX_VOID_RET _msgGet(int idx, const char** msg); +/*! \brief returns operator registration at specified index */ +MX_VOID_RET _msgGet(int idx, const char** msg); } // extern "C" #endif // MXNET_LIB_API_H_ diff --git a/include/mxnet/libinfo.h b/include/mxnet/libinfo.h index 66511421da02..d7ef85b1efb5 100644 --- a/include/mxnet/libinfo.h +++ b/include/mxnet/libinfo.h @@ -74,14 +74,12 @@ #endif /*! \brief Error message for using gpu when MXNET_USE_CUDA==0 */ -#define MXNET_GPU_NOT_ENABLED_ERROR "GPU is not enabled" - +#define MXNET_GPU_NOT_ENABLED_ERROR "GPU is not enabled" #ifndef MXNET_USE_TENSORRT #define MXNET_USE_TENSORRT 0 #endif - #ifndef MXNET_USE_BLAS_ATLAS #define MXNET_USE_BLAS_ATLAS 0 #endif @@ -154,7 +152,6 @@ enum : unsigned { CPU_AVX, CPU_AVX2, - // Multiprocessing / CPU / System OPENMP, SSE, @@ -192,7 +189,6 @@ enum : unsigned { MAX_FEATURES }; - struct EnumNames { static const std::vector names; }; @@ -203,9 +199,10 @@ struct LibInfo { const std::array& getFeatures() { return m_lib_features; } + private: std::array m_lib_features; - static std::unique_ptr m_inst; + static std::unique_ptr m_inst; }; /*! diff --git a/include/mxnet/node/container.h b/include/mxnet/node/container.h index e164f64a9184..12c527cf2e37 100644 --- a/include/mxnet/node/container.h +++ b/include/mxnet/node/container.h @@ -50,14 +50,13 @@ class ArrayNode : public Object { * \tparam Converter a struct that contains converting function * \tparam TIter the content iterator type. */ -template +template class IterAdapter { public: - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename Converter::ResultType; - using pointer = typename Converter::ResultType*; - using reference = typename Converter::ResultType&; // NOLINT(*) + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename Converter::ResultType; + using pointer = typename Converter::ResultType*; + using reference = typename Converter::ResultType&; // NOLINT(*) using iterator_category = typename std::iterator_traits::iterator_category; explicit IterAdapter(TIter iter) : iter_(iter) {} @@ -69,10 +68,10 @@ class IterAdapter { return IterAdapter(iter_ + offset); } - template + template typename std::enable_if::value, - typename T::difference_type>::type - inline operator-(const IterAdapter& rhs) const { + typename T::difference_type>::type inline + operator-(const IterAdapter& rhs) const { return iter_ - rhs.iter_; } @@ -98,8 +97,8 @@ class IterAdapter { * operator[] only provide const acces, use Set to mutate the content. * \tparam T The content NodeRef type. */ -template::value>::type > +template ::value>::type> class Array : public ObjectRef { public: /*! @@ -112,14 +111,14 @@ class Array : public ObjectRef { * \brief move constructor * \param other source */ - Array(Array && other) { // NOLINT(*) + Array(Array&& other) { // NOLINT(*) data_ = std::move(other.data_); } /*! * \brief copy constructor * \param other source */ - Array(const Array &other) { // NOLINT(*) + Array(const Array& other) { // NOLINT(*) data_ = std::move(other.data_); } /*! @@ -133,7 +132,7 @@ class Array : public ObjectRef { * \param end end of iterator * \tparam IterType The type of iterator */ - template + template Array(IterType begin, IterType end) { assign(begin, end); } @@ -141,14 +140,14 @@ class Array : public ObjectRef { * \brief constructor from initializer list * \param init The initalizer list */ - Array(std::initializer_list init) { // NOLINT(*) + Array(std::initializer_list init) { // NOLINT(*) assign(init.begin(), init.end()); } /*! * \brief constructor from vector * \param init The vector */ - Array(const std::vector& init) { // NOLINT(*) + Array(const std::vector& init) { // NOLINT(*) assign(init.begin(), init.end()); } /*! @@ -168,7 +167,7 @@ class Array : public ObjectRef { * \param other The source of assignment * \return reference to self. */ - Array& operator=(Array && other) { + Array& operator=(Array&& other) { data_ = std::move(other.data_); return *this; } @@ -177,7 +176,7 @@ class Array : public ObjectRef { * \param other The source of assignment * \return reference to self. */ - Array& operator=(const Array & other) { + Array& operator=(const Array& other) { data_ = other.data_; return *this; } @@ -187,7 +186,7 @@ class Array : public ObjectRef { * \param end end of iterator * \tparam IterType The type of iterator */ - template + template void assign(IterType begin, IterType end) { auto n = make_object(); for (IterType it = begin; it != end; ++it) { @@ -201,12 +200,12 @@ class Array : public ObjectRef { * \return the i-th element. */ inline const T operator[](size_t i) const { - return DowncastNoCheck( - static_cast(data_.get())->data[i]); + return DowncastNoCheck(static_cast(data_.get())->data[i]); } /*! \return The size of the array */ inline size_t size() const { - if (data_.get() == nullptr) return 0; + if (data_.get() == nullptr) + return 0; return static_cast(data_.get())->data.size(); } /*! @@ -218,9 +217,9 @@ class Array : public ObjectRef { * \return Handle to the internal node container(which ganrantees to be unique) */ inline ArrayNode* CopyOnWrite() { - if (data_.get() == nullptr || !data_.unique()) { + if (data_.get() == nullptr || !data_.unique()) { runtime::ObjectPtr n = make_object(); - n->data = static_cast(data_.get())->data; + n->data = static_cast(data_.get())->data; runtime::ObjectPtr(std::move(n)).swap(data_); } return static_cast(data_.get()); @@ -248,7 +247,7 @@ class Array : public ObjectRef { */ inline void Set(size_t i, const T& value) { ArrayNode* n = this->CopyOnWrite(); - n->data[i] = value; + n->data[i] = value; } /*! \return whether array is empty */ inline bool empty() const { @@ -260,10 +259,11 @@ class Array : public ObjectRef { * \tparam F the type of the mutation function. * \note This function performs copy on write optimization. */ - template + template inline void MutateByApply(F fmutate) { ArrayNode* ptr = static_cast(data_.get()); - if (ptr == nullptr) return; + if (ptr == nullptr) + return; if (data_.unique()) { // Copy on write optimization. // Perform inplace update because this is an unique copy. @@ -271,8 +271,8 @@ class Array : public ObjectRef { // It is important to use move here // to make prevent the element's ref count from increasing // so fmutate itself can perform copy-on-write optimization - T old_elem = DowncastNoCheck(std::move(ptr->data[i])); - T new_elem = fmutate(std::move(old_elem)); + T old_elem = DowncastNoCheck(std::move(ptr->data[i])); + T new_elem = fmutate(std::move(old_elem)); ptr->data[i] = std::move(new_elem); } } else { @@ -305,12 +305,10 @@ class Array : public ObjectRef { return DowncastNoCheck(n); } }; - using iterator = IterAdapter::const_iterator>; + using iterator = IterAdapter::const_iterator>; - using reverse_iterator = IterAdapter< - ValueConverter, - std::vector::const_reverse_iterator>; + using reverse_iterator = + IterAdapter::const_reverse_iterator>; /*! \return begin iterator */ inline iterator begin() const { diff --git a/include/mxnet/node/node.h b/include/mxnet/node/node.h index 76bf0e67fad0..18a2a35ead22 100644 --- a/include/mxnet/node/node.h +++ b/include/mxnet/node/node.h @@ -46,17 +46,17 @@ namespace mxnet { -using runtime::TypeIndex; using runtime::Object; +using runtime::TypeIndex; // We strictly restrict ObjectPtr to ::mxnet::runtime // as it may conflict with ::nnvm::ObjectPtr // using runtime::ObjectPtr; -using runtime::ObjectRef; -using runtime::GetRef; using runtime::Downcast; -using runtime::ObjectHash; -using runtime::ObjectEqual; +using runtime::GetRef; using runtime::make_object; +using runtime::ObjectEqual; +using runtime::ObjectHash; +using runtime::ObjectRef; } // namespace mxnet diff --git a/include/mxnet/op_attr_types.h b/include/mxnet/op_attr_types.h index 4e43d87a87c8..2fec1768ea86 100644 --- a/include/mxnet/op_attr_types.h +++ b/include/mxnet/op_attr_types.h @@ -79,7 +79,7 @@ struct OpContext { * \return the mshadow stream * \tparam xpu the device type of the stream */ - template + template inline mshadow::Stream* get_stream() const { return run_ctx.get_stream(); } @@ -150,18 +150,16 @@ class OpStatePtr { /* \brief Create a OpStatePtr with state of type T. * \param args Arguments passed to T's constructor. */ - template + template static OpStatePtr Create(Args&&... args) { OpStatePtr ret; auto state = new T(std::forward(args)...); - auto var = Engine::Get()->NewVariable(); - ret.ptr_.reset( - new OpState(var, state), - [](OpState* p) { - Engine::Get()->DeleteVariable([](RunContext s) {}, Context::CPU(), p->var); - delete reinterpret_cast(p->state); - delete p; - }); + auto var = Engine::Get()->NewVariable(); + ret.ptr_.reset(new OpState(var, state), [](OpState* p) { + Engine::Get()->DeleteVariable([](RunContext s) {}, Context::CPU(), p->var); + delete reinterpret_cast(p->state); + delete p; + }); return ret; } @@ -170,7 +168,7 @@ class OpStatePtr { return ptr_->var; } /* \brief Get state of type T */ - template + template T& get_state() const { return *reinterpret_cast(ptr_->state); } @@ -214,10 +212,10 @@ class OpStatePtr { * * \note Register under "FCreateLayerOp" */ -using FCreateOpState = std::function& in_type)>; +using FCreateOpState = std::function& in_type)>; /*! * \brief Whether the operator always produces the same @@ -232,7 +230,7 @@ using THasDeterministicOutput = bool; /*! * \brief Execution mode of this operator. */ -using FExecType = std::function; +using FExecType = std::function; /*! * \brief Resiger a compute function for stateful operator. * OpStatePtr is a pointer type, it's content is mutable even if @@ -240,11 +238,11 @@ using FExecType = std::function; * * \note Register under "FStatefulCompute" and "FStatefulCompute" */ -using FStatefulCompute = std::function& inputs, - const std::vector& req, - const std::vector& outputs)>; +using FStatefulCompute = std::function& inputs, + const std::vector& req, + const std::vector& outputs)>; /*! * \brief Resiger a compute function for stateful operator using NDArray interface. * OpStatePtr is a pointer type, it's content is mutable even if @@ -252,19 +250,18 @@ using FStatefulCompute = std::function" and "FStatefulComputeEx" */ -using FStatefulComputeEx = std::function& inputs, - const std::vector& req, - const std::vector& outputs)>; +using FStatefulComputeEx = std::function& inputs, + const std::vector& req, + const std::vector& outputs)>; /*! * \brief The resource request from the operator. * An operator could register ResourceRequestEx, or ResourceRequest, or neither. * * \note Register under "FResourceRequest" */ -using FResourceRequest = std::function< - std::vector (const NodeAttrs& n)>; +using FResourceRequest = std::function(const NodeAttrs& n)>; /*! * \brief The resource request from the operator. * An operator could register ResourceRequestEx, or ResourceRequest, or neither. @@ -273,38 +270,38 @@ using FResourceRequest = std::function< * * \note Register under "FResourceRequestEx" */ -using FResourceRequestEx = std::function< - std::vector (const NodeAttrs& n, - const int dev_mask, - const DispatchMode dispatch_mode)>; +using FResourceRequestEx = + std::function(const NodeAttrs& n, + const int dev_mask, + const DispatchMode dispatch_mode)>; /*! * \brief Register an operator called as a NDArray function * * \note Register under "FNDArrayFunction" */ -using FNDArrayFunction = std::function& inputs, - std::vector* outputs)>; +using FNDArrayFunction = std::function& inputs, + std::vector* outputs)>; /*! * \brief Register a compute function for simple stateless forward only operator * * \note Register under "FCompute" and "FCompute" */ -using FCompute = std::function& inputs, - const std::vector& req, - const std::vector& outputs)>; +using FCompute = std::function& inputs, + const std::vector& req, + const std::vector& outputs)>; /*! * \brief Register an NDArray compute function for simple stateless forward only operator * \note Register under "FComputeEx" and "FComputeEx" * Dispatched only when inferred dispatch_mode is FDispatchComputeEx */ -using FComputeEx = std::function& inputs, - const std::vector& req, - const std::vector& outputs)>; +using FComputeEx = std::function& inputs, + const std::vector& req, + const std::vector& outputs)>; /*! * \brief Register a storage and dispatch mode inference function based on @@ -312,23 +309,23 @@ using FComputeEx = std::function* in_attrs, - std::vector* out_attrs)>; +using FInferStorageType = std::function* in_attrs, + std::vector* out_attrs)>; /*! * \brief Register a quantized node creation function based on the attrs of the node * \note Register under "FQuantizedOp" for non-quantized operators */ -using FQuantizable = std::function; +using FQuantizable = std::function; /*! * \brief Register a quantized node creation function based on the attrs of the node * \note Register under "FQuantizedOp" for non-quantized operators */ -using FQuantizedOp = std::function; +using FQuantizedOp = std::function; /*! * \brief Register a function to determine if the output of a quantized operator @@ -336,30 +333,29 @@ using FQuantizedOp = std::function; * taking int8 data types while accumulating in int32, e.g. quantized_conv. * \note Register under "FNeedRequantize" for non-quantized operators */ -using FNeedRequantize = std::function; +using FNeedRequantize = std::function; /*! * \brief Register a function to determine if the input of a quantized operator * needs to be quantized. This is usually used for the quantized operators * which can handle fp32 inputs directly. */ -using FAvoidQuantizeInput = std::function; +using FAvoidQuantizeInput = std::function< + bool(const NodeAttrs& attrs, const size_t index, const std::string quantize_granularity)>; /*! * \brief Register a function to determine if the input of a quantized operator * needs to be calibrated. This is usually used for the quantized operators * which need calibration on its input. */ -using FNeedCalibrateInput = std::function (const NodeAttrs& attrs)>; +using FNeedCalibrateInput = std::function(const NodeAttrs& attrs)>; /*! * \brief Register a function to determine if the output of a quantized operator * needs to be calibrated. This is usually used for the quantized operators * which need calibration on its output. */ -using FNeedCalibrateOutput = std::function (const NodeAttrs& attrs)>; +using FNeedCalibrateOutput = std::function(const NodeAttrs& attrs)>; } // namespace mxnet diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index d813c74fa9b6..268460fd7c25 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -66,11 +66,11 @@ class Operator { * need, epecial case like Batch Norm requires. * \sa OpReqType, OpContext */ - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_states) = 0; + virtual void Forward(const OpContext& ctx, + const std::vector& in_data, + const std::vector& req, + const std::vector& out_data, + const std::vector& aux_states) = 0; /*! * \brief Perform a Backward Operation, write gradient to the in_grad. * @@ -99,17 +99,18 @@ class Operator { * \param aux_states Auxiliary states of operator. Normally operator doesn't need * \sa OperatorProperty, OpReqType, OpContext */ - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_states) { + virtual void Backward(const OpContext& ctx, + const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data, + const std::vector& req, + const std::vector& in_grad, + const std::vector& aux_states) { LOG(FATAL) << "Backward is not implemented"; } /*! \return [Deprecated] execution type of the operator */ - virtual ExecType exec_type() const final { // NOLINT(*) exec_type has been moved to OperatorProperty + virtual ExecType exec_type() + const final { // NOLINT(*) exec_type has been moved to OperatorProperty return ExecType::kSync; } }; @@ -197,9 +198,9 @@ class OperatorProperty { * \return true if the shape inference is successful, false if there is not enough information. * \throws dmlc::Error if the known arg_shapes are inconsistent. */ - virtual bool InferShape(mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - mxnet::ShapeVector *aux_shape) const = 0; + virtual bool InferShape(mxnet::ShapeVector* in_shape, + mxnet::ShapeVector* out_shape, + mxnet::ShapeVector* aux_shape) const = 0; /*! * \brief infer the data types of outputs and unknown input arguments * \param in_type the type of input arguments of the operator @@ -217,25 +218,28 @@ class OperatorProperty { * \return true if the type inference is successful, false if there is not enough information. * \throws dmlc::Error if the known arg_types are inconsistent. */ - virtual bool InferType(std::vector *in_type, - std::vector *out_type, - std::vector *aux_type) const { + virtual bool InferType(std::vector* in_type, + std::vector* out_type, + std::vector* aux_type) const { CHECK_LE(in_type->size(), this->ListArguments().size()); int n_in = this->ListArguments().size(); for (unsigned i = 0; i < in_type->size(); ++i) { - CHECK(in_type->at(i) == mshadow::default_type_flag || - in_type->at(i) == -1) << "Unsupported data type " << in_type->at(i); + CHECK(in_type->at(i) == mshadow::default_type_flag || in_type->at(i) == -1) + << "Unsupported data type " << in_type->at(i); } in_type->clear(); - for (int i = 0; i < n_in; ++i ) in_type->push_back(mshadow::default_type_flag); + for (int i = 0; i < n_in; ++i) + in_type->push_back(mshadow::default_type_flag); int n_out = this->ListOutputs().size(); out_type->clear(); - for (int i = 0; i < n_out; ++i ) out_type->push_back(mshadow::default_type_flag); + for (int i = 0; i < n_out; ++i) + out_type->push_back(mshadow::default_type_flag); int n_aux = this->ListAuxiliaryStates().size(); aux_type->clear(); - for (int i = 0; i < n_aux; ++i ) aux_type->push_back(mshadow::default_type_flag); + for (int i = 0; i < n_aux; ++i) + aux_type->push_back(mshadow::default_type_flag); return true; } /*! @@ -254,8 +258,9 @@ class OperatorProperty { * \param in_type dtype of the input ndarrays * \return the created operator */ - virtual Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape, - std::vector *in_type) const { + virtual Operator* CreateOperatorEx(Context ctx, + mxnet::ShapeVector* in_shape, + std::vector* in_type) const { std::vector out_type, aux_type; mxnet::ShapeVector out_shape, aux_shape; out_type.resize(this->ListOutputs().size()); @@ -282,8 +287,7 @@ class OperatorProperty { * \param in_shape The input shape to the operator, corresponds to shapes of in_data. * \return Additional resource request */ - virtual std::vector ForwardResource( - const mxnet::ShapeVector &in_shape) const { + virtual std::vector ForwardResource(const mxnet::ShapeVector& in_shape) const { return std::vector(); } /*! @@ -293,8 +297,7 @@ class OperatorProperty { * \param in_shape The input shape to the operator, corresponds to shapes of in_data. * \return Additional resource request */ - virtual std::vector BackwardResource( - const mxnet::ShapeVector &in_shape) const { + virtual std::vector BackwardResource(const mxnet::ShapeVector& in_shape) const { return std::vector(); } /*! @@ -319,10 +322,9 @@ class OperatorProperty { * \return an integer vector indicating the input requirments * \sa BackwardInputs */ - virtual std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const { + virtual std::vector DeclareBackwardDependency(const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data) const { // By default requires to see all the things. // remember to override this function to get a better performance. std::vector ret = out_grad; @@ -352,8 +354,8 @@ class OperatorProperty { * indicating possible in place operations. */ virtual std::vector > ForwardInplaceOption( - const std::vector &in_data, - const std::vector &out_data) const { + const std::vector& in_data, + const std::vector& out_data) const { return std::vector >(); } /*! @@ -383,10 +385,10 @@ class OperatorProperty { * indicating possible in place operations. */ virtual std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const { + const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data, + const std::vector& in_grad) const { return std::vector >(); } /*! @@ -401,10 +403,10 @@ class OperatorProperty { * \return vector of inputs the Backward Operation depends on. * \sa DeclareBackwardDependency */ - template - inline std::vector BackwardInputs(const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const { + template + inline std::vector BackwardInputs(const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data) const { int counter = 0; std::vector out_grad_index(out_grad.size()); std::vector in_data_index(in_data.size()); @@ -423,8 +425,8 @@ class OperatorProperty { all_data.insert(all_data.end(), in_data.begin(), in_data.end()); all_data.insert(all_data.end(), out_data.begin(), out_data.end()); - std::vector ret_index = this->DeclareBackwardDependency( - out_grad_index, in_data_index, out_data_index); + std::vector ret_index = + this->DeclareBackwardDependency(out_grad_index, in_data_index, out_data_index); std::vector ret(ret_index.size()); for (size_t i = 0; i < ret_index.size(); ++i) { @@ -437,7 +439,7 @@ class OperatorProperty { * \param type_name the type string of the OperatorProperty * \return a new constructed OperatorProperty */ - static OperatorProperty *Create(const char* type_name); + static OperatorProperty* Create(const char* type_name); /*! \return execution type of the operator */ virtual ExecType exec_type() const { return ExecType::kSync; @@ -445,13 +447,12 @@ class OperatorProperty { }; /*! \brief typedef the factory function of operator property */ -typedef std::function OperatorPropertyFactory; +typedef std::function OperatorPropertyFactory; /*! * \brief Registry entry for OperatorProperty factory functions. */ struct OperatorPropertyReg - : public dmlc::FunctionRegEntryBase { + : public dmlc::FunctionRegEntryBase { /*! * \brief Set key_var_num_args * When this is set, the API caller is required to pass in a @@ -464,7 +465,7 @@ struct OperatorPropertyReg * * \param key the key name to be set */ - inline OperatorPropertyReg& set_key_var_num_args(const std::string &key) { // NOLINT(*) + inline OperatorPropertyReg& set_key_var_num_args(const std::string& key) { // NOLINT(*) this->key_var_num_args = key; return *this; } @@ -472,12 +473,12 @@ struct OperatorPropertyReg * \brief Check if TypeString of the type matches the registered name */ inline OperatorPropertyReg& check_name() { - OperatorProperty *p = this->body(); - std::string type = p->TypeString(); + OperatorProperty* p = this->body(); + std::string type = p->TypeString(); delete p; - CHECK_EQ(this->name, type) - << "Register Name and TypeString mismatch, name=\"" << this->name << "\"," - << " but TypeString=\"" << type <<"\""; + CHECK_EQ(this->name, type) << "Register Name and TypeString mismatch, name=\"" << this->name + << "\"," + << " but TypeString=\"" << type << "\""; return *this; } @@ -499,11 +500,11 @@ struct OperatorPropertyReg * * \endcode */ -#define MXNET_REGISTER_OP_PROPERTY(name, OperatorPropertyType) \ +#define MXNET_REGISTER_OP_PROPERTY(name, OperatorPropertyType) \ DMLC_REGISTRY_REGISTER(::mxnet::OperatorPropertyReg, OperatorPropertyReg, name) \ - .set_body([]() { return new OperatorPropertyType(); }) \ - .set_return_type("NDArray-or-Symbol") \ - .check_name() + .set_body([]() { return new OperatorPropertyType(); }) \ + .set_return_type("NDArray-or-Symbol") \ + .check_name() #endif // DMLC_USE_CXX11 } // namespace mxnet diff --git a/include/mxnet/operator_util.h b/include/mxnet/operator_util.h index 9f1ddc4570c3..c5c274ebede9 100644 --- a/include/mxnet/operator_util.h +++ b/include/mxnet/operator_util.h @@ -30,7 +30,7 @@ #define MXNET_OPERATOR_UTIL_H_ #ifdef _MSC_VER -#pragma warning(disable:4503) // disable warning: decorated name length exceeded. +#pragma warning(disable : 4503) // disable warning: decorated name length exceeded. #endif #include @@ -86,10 +86,7 @@ struct EnvArguments { * \param req The requirement to stroe the ret. * \param ctx Runtime context to execute the function. */ -typedef void (*SourceFunction)(const EnvArguments& env, - TBlob* ret, - OpReqType req, - RunContext ctx); +typedef void (*SourceFunction)(const EnvArguments& env, TBlob* ret, OpReqType req, RunContext ctx); /*! * \brief Shape inference function to get the correct shape. @@ -118,8 +115,7 @@ typedef void (*UnaryFunction)(const TBlob& src, * \param env The Environment arguments. * \return The inferred result shape. */ -typedef mxnet::TShape (*UnaryShapeFunction)(const mxnet::TShape& src, - const EnvArguments& env); +typedef mxnet::TShape (*UnaryShapeFunction)(const mxnet::TShape& src, const EnvArguments& env); /*! * \brief Gradient function that takes output value of function and computes gradient wrt to input. @@ -189,8 +185,8 @@ typedef void (*BinaryFunction)(const TBlob& lhs, * \return The inferred result shape. */ typedef mxnet::TShape (*BinaryShapeFunction)(const mxnet::TShape& lhs, - const mxnet::TShape& rhs, - const EnvArguments& env); + const mxnet::TShape& rhs, + const EnvArguments& env); /*! * \brief Gradient function that takes only output gradient and computes gradient wrt to input. * We support total gradient as a whole to make it easy to combine a few ops. @@ -246,16 +242,10 @@ enum SimpleOpInplaceOption { }; /*! \brief options in the registry to set symbolic registration */ -enum SimpleOpScalarOption { - kScalarBeforeArray, - kArrayBeforeScalar -}; +enum SimpleOpScalarOption { kScalarBeforeArray, kArrayBeforeScalar }; /*! \brief options in the registry to set symbolic registration */ -enum SimpleOpRegOption { - kNotRegisterSymbolic, - kRegisterSymbolic -}; +enum SimpleOpRegOption { kNotRegisterSymbolic, kRegisterSymbolic }; /*! \brief registry entry to register simple operators via functions. */ class SimpleOpRegEntry { @@ -278,9 +268,8 @@ class SimpleOpRegEntry { * \param enable_scalar whether to enable scalar argument * \param type_mask the position of the scalar argument. */ - virtual TSelf& set_enable_scalar( - bool enable_scalar, - SimpleOpScalarOption type_mask = kArrayBeforeScalar) = 0; + virtual TSelf& set_enable_scalar(bool enable_scalar, + SimpleOpScalarOption type_mask = kArrayBeforeScalar) = 0; /*! * \brief set whether to enable kwargs * A function cannot have both kwargs and scalar arguments. @@ -294,8 +283,7 @@ class SimpleOpRegEntry { * The resource will be presented in both forward and backward. * \param reqs the request. */ - virtual TSelf& set_resource_request( - const std::vector& reqs) = 0; + virtual TSelf& set_resource_request(const std::vector& reqs) = 0; /*! * \brief set resource request * By default there is no resource request. @@ -326,10 +314,9 @@ class SimpleOpRegEntry { * \param fsource The unary function that peforms the operation. * \param register_symbolic Whether register a symbolic operator as well. */ - virtual TSelf& set_function( - int dev_mask, - SourceFunction fsource, - SimpleOpRegOption register_symbolic = kRegisterSymbolic) = 0; + virtual TSelf& set_function(int dev_mask, + SourceFunction fsource, + SimpleOpRegOption register_symbolic = kRegisterSymbolic) = 0; /*! * \brief set function of the function to be funary * \param dev_mask The device mask of the function can act on. @@ -337,11 +324,10 @@ class SimpleOpRegEntry { * \param inplace_in_out Whether do inplace optimization on in and out. * \param register_symbolic Whether register a symbolic operator as well. */ - virtual TSelf& set_function( - int dev_mask, - UnaryFunction funary, - SimpleOpInplaceOption inplace_in_out, - SimpleOpRegOption register_symbolic = kRegisterSymbolic) = 0; + virtual TSelf& set_function(int dev_mask, + UnaryFunction funary, + SimpleOpInplaceOption inplace_in_out, + SimpleOpRegOption register_symbolic = kRegisterSymbolic) = 0; /*! * \brief set function of the function to be funary * \param dev_mask The device mask of the function can act on. @@ -349,11 +335,10 @@ class SimpleOpRegEntry { * \param inplace_lhs_out Whether do inplace optimization on lhs and out. * \param register_symbolic Whether register a symbolic operator as well. */ - virtual TSelf& set_function( - int dev_mask, - BinaryFunction fbinary, - SimpleOpInplaceOption inplace_lhs_out, - SimpleOpRegOption register_symbolic = kRegisterSymbolic) = 0; + virtual TSelf& set_function(int dev_mask, + BinaryFunction fbinary, + SimpleOpInplaceOption inplace_lhs_out, + SimpleOpRegOption register_symbolic = kRegisterSymbolic) = 0; /*! * \brief set gradient of the function of this function. * \param dev_mask The device mask of the function can act on. @@ -404,14 +389,14 @@ class SimpleOpRegEntry { * \param description The description of the function. * \return reference to self. */ - virtual TSelf& describe(const std::string &description) = 0; + virtual TSelf& describe(const std::string& description) = 0; /*! * \brief Describe the function. * \param args argument information. * Add additional arguments to the function. * \return reference to self. */ - virtual TSelf& add_arguments(const std::vector &args) = 0; + virtual TSelf& add_arguments(const std::vector& args) = 0; /*! \brief virtual destructor */ virtual ~SimpleOpRegEntry() {} }; @@ -424,13 +409,13 @@ class SimpleOpRegistry { * \param name name of the function * \return ref to the registered entry, used to set properties */ - SimpleOpRegEntry &__REGISTER_OR_FIND__(char const* name); + SimpleOpRegEntry& __REGISTER_OR_FIND__(char const* name); /*! * \brief Find the entry with corresponding name. * \param name name of the function * \return the corresponding function, can be nullptr */ - inline static const SimpleOpRegEntry *Find(const std::string &name) { + inline static const SimpleOpRegEntry* Find(const std::string& name) { return Get()->fmap_.at(name); } /*! \return global singleton of the registry */ @@ -451,29 +436,28 @@ class SimpleOpRegistry { * \tparam OType output type * \tparam Exp expression type */ -#define ASSIGN_DISPATCH(out, req, exp) \ - { \ - switch (req) { \ - case kNullOp: \ - break; \ - case kWriteTo: \ - case kWriteInplace: \ - (out) = (exp); \ - break; \ - case kAddTo: \ - (out) += (exp); \ - break; \ - default: \ - LOG(FATAL) << "not reached"; \ - } \ +#define ASSIGN_DISPATCH(out, req, exp) \ + { \ + switch (req) { \ + case kNullOp: \ + break; \ + case kWriteTo: \ + case kWriteInplace: \ + (out) = (exp); \ + break; \ + case kAddTo: \ + (out) += (exp); \ + break; \ + default: \ + LOG(FATAL) << "not reached"; \ + } \ } /*! -* \brief Maximum ndim supported for special operators like broadcasting with non contiguous lhs/rhs -*/ + * \brief Maximum ndim supported for special operators like broadcasting with non contiguous lhs/rhs + */ #define MXNET_SPECIAL_MAX_NDIM 5 - //-------------------------------------------------------------- // The following part are API Registration of Simple Operators //-------------------------------------------------------------- @@ -494,9 +478,8 @@ class SimpleOpRegistry { * * \endcode */ -#define MXNET_REGISTER_SIMPLE_OP(Name, DEV) \ - static ::mxnet::op::SimpleOpRegEntry & \ - __make_ ## SimpleOpRegEntry ## _ ## Name ## __ ## DEV ##__ = \ +#define MXNET_REGISTER_SIMPLE_OP(Name, DEV) \ + static ::mxnet::op::SimpleOpRegEntry& __make_##SimpleOpRegEntry##_##Name##__##DEV##__ = \ ::mxnet::op::SimpleOpRegistry::Get()->__REGISTER_OR_FIND__(#Name) } // namespace op diff --git a/include/mxnet/random_generator.h b/include/mxnet/random_generator.h index 8a717451c23b..4d6f8c70a1c1 100644 --- a/include/mxnet/random_generator.h +++ b/include/mxnet/random_generator.h @@ -37,10 +37,10 @@ namespace mxnet { namespace common { namespace random { -template +template class RandGenerator; -template +template class RandGenerator { public: // at least how many random numbers should be generated by one CPU thread. @@ -52,15 +52,17 @@ class RandGenerator { // TODO(alexzai): move impl class to separate file - tracked in MXNET-948 class Impl { public: - typedef typename std::conditional::value, - DType, double>::type FType; - explicit Impl(RandGenerator *gen, int state_idx) + typedef + typename std::conditional::value, DType, double>::type FType; + explicit Impl(RandGenerator* gen, int state_idx) : engine_(gen->states_ + state_idx) {} - Impl(const Impl &) = delete; - Impl &operator=(const Impl &) = delete; + Impl(const Impl&) = delete; + Impl& operator=(const Impl&) = delete; - MSHADOW_XINLINE int rand() { return engine_->operator()(); } + MSHADOW_XINLINE int rand() { + return engine_->operator()(); + } MSHADOW_XINLINE int64_t rand_int64() { return static_cast(engine_->operator()() << 31) + engine_->operator()(); @@ -68,8 +70,8 @@ class RandGenerator { MSHADOW_XINLINE FType uniform() { typedef typename std::conditional::value, - std::uniform_int_distribution, - std::uniform_real_distribution>::type GType; + std::uniform_int_distribution, + std::uniform_real_distribution>::type GType; GType dist_uniform; return dist_uniform(*engine_); } @@ -80,19 +82,20 @@ class RandGenerator { } private: - std::mt19937 *engine_; + std::mt19937* engine_; }; // class RandGenerator::Impl - static void AllocState(RandGenerator *inst) { + static void AllocState(RandGenerator* inst) { inst->states_ = new std::mt19937[kNumRandomStates]; } - static void FreeState(RandGenerator *inst) { + static void FreeState(RandGenerator* inst) { delete[] inst->states_; } - MSHADOW_XINLINE void Seed(mshadow::Stream *, uint32_t seed) { - for (int i = 0; i < kNumRandomStates; ++i) (states_ + i)->seed(seed + i); + MSHADOW_XINLINE void Seed(mshadow::Stream*, uint32_t seed) { + for (int i = 0; i < kNumRandomStates; ++i) + (states_ + i)->seed(seed + i); } // export global random states, used by c++ custom operator @@ -101,18 +104,18 @@ class RandGenerator { } private: - std::mt19937 *states_; + std::mt19937* states_; }; // class RandGenerator -template +template const int RandGenerator::kMinNumRandomPerThread = 64; -template +template const int RandGenerator::kNumRandomStates = 1024; #if MXNET_USE_CUDA -template +template class RandGenerator { public: // at least how many random numbers should be generated by one GPU thread. @@ -127,14 +130,12 @@ class RandGenerator { // TODO(alexzai): move impl class to separate file - tracked in MXNET-948 class Impl { public: - Impl &operator=(const Impl &) = delete; - Impl(const Impl &) = delete; + Impl& operator=(const Impl&) = delete; + Impl(const Impl&) = delete; // Copy state to local memory for efficiency. - __device__ explicit Impl(RandGenerator *gen, int state_idx) - : global_gen_(gen), - global_state_idx_(state_idx), - state_(*(gen->states_ + state_idx)) {} + __device__ explicit Impl(RandGenerator* gen, int state_idx) + : global_gen_(gen), global_state_idx_(state_idx), state_(*(gen->states_ + state_idx)) {} __device__ ~Impl() { // store the curand state back into global memory @@ -158,25 +159,25 @@ class RandGenerator { } private: - RandGenerator *global_gen_; + RandGenerator* global_gen_; int global_state_idx_; curandStatePhilox4_32_10_t state_; }; // class RandGenerator::Impl - static void AllocState(RandGenerator *inst); + static void AllocState(RandGenerator* inst); - static void FreeState(RandGenerator *inst); + static void FreeState(RandGenerator* inst); - void Seed(mshadow::Stream *s, uint32_t seed); + void Seed(mshadow::Stream* s, uint32_t seed); // export global random states, used by c++ custom operator void* GetStates(); private: - curandStatePhilox4_32_10_t *states_; + curandStatePhilox4_32_10_t* states_; }; // class RandGenerator -template<> +template <> class RandGenerator { public: // uniform number generation in Cuda made consistent with stl (include 0 but exclude 1) @@ -186,14 +187,12 @@ class RandGenerator { // TODO(alexzai): move impl class to separate file - tracked in MXNET-948 class Impl { public: - Impl &operator=(const Impl &) = delete; - Impl(const Impl &) = delete; + Impl& operator=(const Impl&) = delete; + Impl(const Impl&) = delete; // Copy state to local memory for efficiency. - __device__ explicit Impl(RandGenerator *gen, int state_idx) - : global_gen_(gen), - global_state_idx_(state_idx), - state_(*(gen->states_ + state_idx)) {} + __device__ explicit Impl(RandGenerator* gen, int state_idx) + : global_gen_(gen), global_state_idx_(state_idx), state_(*(gen->states_ + state_idx)) {} __device__ ~Impl() { // store the curand state back into global memory @@ -217,13 +216,13 @@ class RandGenerator { } private: - RandGenerator *global_gen_; + RandGenerator* global_gen_; int global_state_idx_; curandStatePhilox4_32_10_t state_; }; // class RandGenerator::Impl private: - curandStatePhilox4_32_10_t *states_; + curandStatePhilox4_32_10_t* states_; }; // class RandGenerator #endif // MXNET_USE_CUDA diff --git a/include/mxnet/resource.h b/include/mxnet/resource.h index b98abe1c997f..b856002cb76f 100644 --- a/include/mxnet/resource.h +++ b/include/mxnet/resource.h @@ -74,15 +74,12 @@ inline std::string __extract_fname(const std::string& path) { } // anonymous namespace #if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) -#define MXNET_RESOURCE_DEFAULT_NAME_FARG(tag) \ - std::string(tag) \ - + " (" + __extract_fname(__builtin_FILE()) \ - + " +" + std::to_string(__builtin_LINE()) + ")" +#define MXNET_RESOURCE_DEFAULT_NAME_FARG(tag) \ + std::string(tag) + " (" + __extract_fname(__builtin_FILE()) + " +" + \ + std::to_string(__builtin_LINE()) + ")" #else // !__GNUC__ || __clang__ #define MXNET_RESOURCE_DEFAULT_NAME_FARG(tag) \ - std::string(tag) \ - + " (" + __extract_fname(__FILE__) \ - + " +" + std::to_string(__LINE__) + ")" + std::string(tag) + " (" + __extract_fname(__FILE__) + " +" + std::to_string(__LINE__) + ")" #endif // __GNUC__ && !__clang__ /*! @@ -101,7 +98,7 @@ struct Resource { * \brief pointer to the resource, do not use directly, * access using member functions */ - void *ptr_; + void* ptr_; /*! \brief default constructor */ Resource() : id(0) {} /*! @@ -110,12 +107,10 @@ struct Resource { * \return the mshadow random number generator requested. * \tparam xpu the device type of random number generator. */ - template - inline mshadow::Random* get_random( - mshadow::Stream *stream) const { + template + inline mshadow::Random* get_random(mshadow::Stream* stream) const { CHECK_EQ(req.type, ResourceRequest::kRandom); - mshadow::Random *ret = - static_cast*>(ptr_); + mshadow::Random* ret = static_cast*>(ptr_); ret->set_stream(stream); return ret; } @@ -126,7 +121,7 @@ struct Resource { * \tparam DType the return type. * \return the parallel random number generator. for gpu, it is allocated on global memory. */ - template + template inline common::random::RandGenerator* get_parallel_random() const { CHECK_EQ(req.type, ResourceRequest::kParallelRandom); return static_cast*>(ptr_); @@ -149,10 +144,11 @@ struct Resource { * \tparam xpu the device type of random number generator. * \tparam ndim the number of dimension of the tensor requested. */ - template + template inline mshadow::Tensor get_space( - mshadow::Shape shape, mshadow::Stream *stream, - const std::string &name = MXNET_RESOURCE_DEFAULT_NAME_FARG("temp_space")) const { + mshadow::Shape shape, + mshadow::Stream* stream, + const std::string& name = MXNET_RESOURCE_DEFAULT_NAME_FARG("temp_space")) const { return get_space_typed(shape, stream, name); } /*! @@ -163,9 +159,8 @@ struct Resource { * \return the mshadow tensor requested. * \tparam ndim the number of dimension of the tensor requested. */ - template - inline mshadow::Tensor get_host_space( - mshadow::Shape shape) const { + template + inline mshadow::Tensor get_host_space(mshadow::Shape shape) const { return get_host_space_typed(shape); } /*! @@ -179,15 +174,17 @@ struct Resource { * \tparam xpu the device type of random number generator. * \tparam ndim the number of dimension of the tensor requested. */ - template + template inline mshadow::Tensor get_space_typed( - mshadow::Shape shape, mshadow::Stream *stream, - const std::string &name = MXNET_RESOURCE_DEFAULT_NAME_FARG("temp_space")) const { + mshadow::Shape shape, + mshadow::Stream* stream, + const std::string& name = MXNET_RESOURCE_DEFAULT_NAME_FARG("temp_space")) const { CHECK_EQ(req.type, ResourceRequest::kTempSpace); return mshadow::Tensor( - reinterpret_cast(get_space_internal( - shape.Size() * sizeof(DType), name)), - shape, shape[ndim - 1], stream); + reinterpret_cast(get_space_internal(shape.Size() * sizeof(DType), name)), + shape, + shape[ndim - 1], + stream); } #if MXNET_USE_CUDNN == 1 /*! @@ -200,10 +197,10 @@ struct Resource { * \return the mshadow tensor requested. */ void get_cudnn_dropout_desc( - cudnnDropoutDescriptor_t *dropout_desc, - mshadow::Stream *stream, + cudnnDropoutDescriptor_t* dropout_desc, + mshadow::Stream* stream, const float dropout, - const std::string &name = MXNET_RESOURCE_DEFAULT_NAME_FARG("cudnn_dropout_state")) const; + const std::string& name = MXNET_RESOURCE_DEFAULT_NAME_FARG("cudnn_dropout_state")) const; #endif // MXNET_USE_CUDNN == 1 /*! @@ -215,12 +212,13 @@ struct Resource { * \tparam ndim the number of dimnesion of tensor requested * \tparam DType request data type */ - template - inline mshadow::Tensor get_host_space_typed( - mshadow::Shape shape) const { - return mshadow::Tensor( + template + inline mshadow::Tensor get_host_space_typed(mshadow::Shape shape) const { + return mshadow::Tensor( reinterpret_cast(get_host_space_internal(shape.Size() * sizeof(DType))), - shape, shape[ndim - 1], nullptr); + shape, + shape[ndim - 1], + nullptr); } /*! * \brief internal function to get space from resources. @@ -228,13 +226,13 @@ struct Resource { * \param name the Name of the operator requesting the resource. * \return The allocated space. */ - void* get_space_internal(size_t size, const std::string &name) const; + void* get_space_internal(size_t size, const std::string& name) const; /*! * \brief internal function to get cpu space from resources. * \param size The size of space. * \return The allocated space */ - void *get_host_space_internal(size_t size) const; + void* get_host_space_internal(size_t size) const; }; /*! \brief Global resource manager */ @@ -248,7 +246,7 @@ class ResourceManager { * \note The returned resource's ownership is * still hold by the manager singleton. */ - virtual Resource Request(Context ctx, const ResourceRequest &req) = 0; + virtual Resource Request(Context ctx, const ResourceRequest& req) = 0; /*! * \brief Seed all the allocated random number generators. * \param seed the seed to the random number generators on all devices. @@ -264,7 +262,7 @@ class ResourceManager { /*! * \return Resource manager singleton. */ - static ResourceManager *Get(); + static ResourceManager* Get(); }; } // namespace mxnet #endif // MXNET_RESOURCE_H_ diff --git a/include/mxnet/rtc.h b/include/mxnet/rtc.h index 56717f4a34c7..a87615143bc0 100644 --- a/include/mxnet/rtc.h +++ b/include/mxnet/rtc.h @@ -83,12 +83,19 @@ class CudaModule { class Kernel { public: /*! \brief Launch the kernel */ - void Launch(const Context& ctx, const std::vector& args, - uint32_t grid_dim_x, uint32_t grid_dim_y, uint32_t grid_dim_z, - uint32_t block_dim_x, uint32_t block_dim_y, uint32_t block_dim_z, + void Launch(const Context& ctx, + const std::vector& args, + uint32_t grid_dim_x, + uint32_t grid_dim_y, + uint32_t grid_dim_z, + uint32_t block_dim_x, + uint32_t block_dim_y, + uint32_t block_dim_z, uint32_t shared_mem); /*! \brief kernel interface signature */ - const std::vector& signature() { return signature_; } + const std::vector& signature() { + return signature_; + } private: friend class CudaModule; @@ -125,8 +132,7 @@ class CudaModule { * \param signature kernel signature * \return shared pointer to cuda kernel */ - std::shared_ptr GetKernel(const std::string& name, - const std::vector& signature); + std::shared_ptr GetKernel(const std::string& name, const std::vector& signature); }; } // namespace rtc diff --git a/include/mxnet/runtime/c_runtime_api.h b/include/mxnet/runtime/c_runtime_api.h index 6a2948225ecc..446bd40b682c 100644 --- a/include/mxnet/runtime/c_runtime_api.h +++ b/include/mxnet/runtime/c_runtime_api.h @@ -34,7 +34,6 @@ extern "C" { #include #include - /*! * \brief The type code in MXNetType * \note MXNetType is used in two places. @@ -43,25 +42,25 @@ typedef enum { // The type code of other types are compatible with DLPack. // The next few fields are extension types // that is used by MXNet API calls. - kHandle = 3U, - kNull = 4U, - kMXNetType = 5U, - kMXNetContext = 6U, - kObjectHandle = 7U, - kStr = 8U, - kBytes = 9U, - kPyArg = 10U, + kHandle = 3U, + kNull = 4U, + kMXNetType = 5U, + kMXNetContext = 6U, + kObjectHandle = 7U, + kStr = 8U, + kBytes = 9U, + kPyArg = 10U, kNDArrayHandle = 11U, // Extension codes for other frameworks to integrate MXNet PackedFunc. // To make sure each framework's id do not conflict, use first and // last sections to mark ranges. // Open an issue at the repo if you need a section of code. - kExtBegin = 15U, + kExtBegin = 15U, kNNVMFirst = 16U, - kNNVMLast = 20U, + kNNVMLast = 20U, // The following section of code is used for non-reserved types. kExtReserveEnd = 64U, - kExtEnd = 128U, + kExtEnd = 128U, // The rest of the space is used for custom, user-supplied datatypes kCustomBegin = 129U, } MXNetTypeCode; @@ -144,8 +143,7 @@ MXNET_DLL int MXNetFuncGetGlobal(const char* name, MXNetFunctionHandle* out); * \param out_array The array of function names. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNetFuncListGlobalNames(int* out_size, - const char*** out_array); +MXNET_DLL int MXNetFuncListGlobalNames(int* out_size, const char*** out_array); /*! * \brief Free the object. @@ -157,7 +155,6 @@ MXNET_DLL int MXNetFuncListGlobalNames(int* out_size, */ MXNET_DLL int MXNetObjectFree(MXNetObjectHandle obj); - /*! * \brief Get the type_index from an object. * diff --git a/include/mxnet/runtime/container.h b/include/mxnet/runtime/container.h index fc1d4a173669..56a0ef9d601c 100644 --- a/include/mxnet/runtime/container.h +++ b/include/mxnet/runtime/container.h @@ -105,8 +105,7 @@ class InplaceArrayBase { * \brief Destroy the Inplace Array Base object */ ~InplaceArrayBase() { - if (!(std::is_standard_layout::value && - std::is_trivial::value)) { + if (!(std::is_standard_layout::value && std::is_trivial::value)) { size_t size = Self()->GetSize(); for (size_t i = 0; i < size; ++i) { ElemType* fp = reinterpret_cast(AddressOf(i)); @@ -150,14 +149,14 @@ class InplaceArrayBase { * \return Raw pointer to the element. */ void* AddressOf(size_t idx) const { - static_assert(alignof(ArrayType) % alignof(ElemType) == 0 && - sizeof(ArrayType) % alignof(ElemType) == 0, - "The size and alignment of ArrayType should respect " - "ElemType's alignment."); + static_assert( + alignof(ArrayType) % alignof(ElemType) == 0 && sizeof(ArrayType) % alignof(ElemType) == 0, + "The size and alignment of ArrayType should respect " + "ElemType's alignment."); size_t kDataStart = sizeof(ArrayType); - ArrayType* self = Self(); - char* data_start = reinterpret_cast(self) + kDataStart; + ArrayType* self = Self(); + char* data_start = reinterpret_cast(self) + kDataStart; return data_start + idx * sizeof(ElemType); } }; @@ -171,7 +170,7 @@ class ADTObj : public Object, public InplaceArrayBase { uint32_t size{0}; // The fields of the structure follows directly in memory. - static constexpr const char* _type_key = "MXNet.ADT"; + static constexpr const char* _type_key = "MXNet.ADT"; static constexpr const uint32_t _type_index = TypeIndex::kMXNetADT; MXNET_DECLARE_FINAL_OBJECT_INFO(ADTObj, Object) @@ -179,7 +178,9 @@ class ADTObj : public Object, public InplaceArrayBase { /*! * \return The number of elements in the array. */ - size_t GetSize() const { return size; } + size_t GetSize() const { + return size; + } /*! * \brief Initialize the elements in the array. @@ -191,8 +192,8 @@ class ADTObj : public Object, public InplaceArrayBase { template void Init(Iterator begin, Iterator end) { size_t num_elems = std::distance(begin, end); - this->size = 0; - auto it = begin; + this->size = 0; + auto it = begin; for (size_t i = 0; i < num_elems; ++i) { InplaceArrayBase::EmplaceInit(i, *it++); // Only increment size after the initialization succeeds @@ -213,8 +214,7 @@ class ADT : public ObjectRef { * \param fields The fields of the ADT object. * \return The constructed ADT object reference. */ - ADT(uint32_t tag, std::vector fields) - : ADT(tag, fields.begin(), fields.end()){}; + ADT(uint32_t tag, std::vector fields) : ADT(tag, fields.begin(), fields.end()){}; /*! * \brief construct an ADT object reference. @@ -226,8 +226,8 @@ class ADT : public ObjectRef { template ADT(uint32_t tag, Iterator begin, Iterator end) { size_t num_elems = std::distance(begin, end); - auto ptr = make_inplace_array_object(num_elems); - ptr->tag = tag; + auto ptr = make_inplace_array_object(num_elems); + ptr->tag = tag; ptr->Init(begin, end); data_ = std::move(ptr); } @@ -238,8 +238,7 @@ class ADT : public ObjectRef { * \param init The initializer list of fields. * \return The constructed ADT object reference. */ - ADT(uint32_t tag, std::initializer_list init) - : ADT(tag, init.begin(), init.end()){}; + ADT(uint32_t tag, std::initializer_list init) : ADT(tag, init.begin(), init.end()){}; /*! * \brief Access element at index. @@ -254,12 +253,16 @@ class ADT : public ObjectRef { /*! * \brief Return the ADT tag. */ - size_t tag() const { return operator->()->tag; } + size_t tag() const { + return operator->()->tag; + } /*! * \brief Return the number of fields. */ - size_t size() const { return operator->()->size; } + size_t size() const { + return operator->()->size; + } /*! * \brief Construct a tuple object. diff --git a/include/mxnet/runtime/container_ext.h b/include/mxnet/runtime/container_ext.h index acbc02af8fe5..d9f513151fd8 100644 --- a/include/mxnet/runtime/container_ext.h +++ b/include/mxnet/runtime/container_ext.h @@ -83,67 +83,93 @@ class MapObj : public Object { static_assert(sizeof(KVType) == 16 || sizeof(KVType) == 8, "sizeof(KVType) incorrect"); static constexpr const uint32_t _type_index = runtime::TypeIndex::kMXNetMap; - static constexpr const char* _type_key = "MXNet.Map"; + static constexpr const char* _type_key = "MXNet.Map"; MXNET_DECLARE_FINAL_OBJECT_INFO(MapObj, Object); /*! * \brief Number of elements in the MapObj * \return The result */ - size_t size() const { return data_.size(); } + size_t size() const { + return data_.size(); + } /*! * \brief Count the number of times a key exists in the hash map * \param key The indexing key * \return The result, 0 or 1 */ - size_t count(const key_type& key) const { return data_.count(key); } + size_t count(const key_type& key) const { + return data_.count(key); + } /*! * \brief Index value associated with a key, throw exception if the key does not exist * \param key The indexing key * \return The const reference to the value */ - const mapped_type& at(const key_type& key) const { return data_.at(key); } + const mapped_type& at(const key_type& key) const { + return data_.at(key); + } /*! * \brief Index value associated with a key, throw exception if the key does not exist * \param key The indexing key * \return The mutable reference to the value */ - mapped_type& at(const key_type& key) { return data_.at(key); } + mapped_type& at(const key_type& key) { + return data_.at(key); + } /*! \return begin iterator */ - iterator begin() { return data_.begin(); } + iterator begin() { + return data_.begin(); + } /*! \return const begin iterator */ - const_iterator begin() const { return data_.begin(); } + const_iterator begin() const { + return data_.begin(); + } /*! \return end iterator */ - iterator end() { return data_.end(); } + iterator end() { + return data_.end(); + } /*! \return end iterator */ - const_iterator end() const { return data_.end(); } + const_iterator end() const { + return data_.end(); + } /*! * \brief Index value associated with a key * \param key The indexing key * \return The iterator of the entry associated with the key, end iterator if not exists */ - const_iterator find(const key_type& key) const { return data_.find(key); } + const_iterator find(const key_type& key) const { + return data_.find(key); + } /*! * \brief Index value associated with a key * \param key The indexing key * \return The iterator of the entry associated with the key, end iterator if not exists */ - iterator find(const key_type& key) { return data_.find(key); } + iterator find(const key_type& key) { + return data_.find(key); + } /*! * \brief Erase the entry associated with the iterator * \param position The iterator */ - void erase(const iterator& position) { data_.erase(position); } + void erase(const iterator& position) { + data_.erase(position); + } /*! * \brief Erase the entry associated with the key, do nothing if not exists * \param key The indexing key */ - void erase(const key_type& key) { data_.erase(key); } + void erase(const key_type& key) { + data_.erase(key); + } /*! * \brief Create an empty container * \return The object created */ - static ObjectPtr Empty() { return make_object(); } + static ObjectPtr Empty() { + return make_object(); + } protected: /*! @@ -156,7 +182,7 @@ class MapObj : public Object { template static ObjectPtr CreateFromRange(IterType first, IterType last) { ObjectPtr p = make_object(); - p->data_ = ContainerType(first, last); + p->data_ = ContainerType(first, last); return p; } /*! @@ -165,7 +191,7 @@ class MapObj : public Object { * \param map The pointer to the map, can be changed if re-hashing happens */ static void InsertMaybeReHash(const KVType& kv, ObjectPtr* map) { - MapObj* map_node = static_cast(map->get()); + MapObj* map_node = static_cast(map->get()); map_node->data_[kv.first] = kv.second; } /*! @@ -175,7 +201,7 @@ class MapObj : public Object { */ static ObjectPtr CopyFrom(MapObj* from) { ObjectPtr p = make_object(); - p->data_ = ContainerType(from->data_.begin(), from->data_.end()); + p->data_ = ContainerType(from->data_.begin(), from->data_.end()); return p; } /*! \brief The real container storing data */ @@ -193,23 +219,28 @@ class MapObj : public Object { * \tparam K The key NodeRef type. * \tparam V The value NodeRef type. */ -template ::value>::type, typename = typename std::enable_if::value>::type> class Map : public ObjectRef { public: - using key_type = K; + using key_type = K; using mapped_type = V; class iterator; /*! * \brief default constructor */ - Map() { data_ = MapObj::Empty(); } + Map() { + data_ = MapObj::Empty(); + } /*! * \brief move constructor * \param other source */ - Map(Map&& other) { data_ = std::move(other.data_); } + Map(Map&& other) { + data_ = std::move(other.data_); + } /*! * \brief copy constructor * \param other source @@ -268,13 +299,17 @@ class Map : public ObjectRef { * \param key The key * \return the corresonding element. */ - const V at(const K& key) const { return DowncastNoCheck(GetMapObj()->at(key)); } + const V at(const K& key) const { + return DowncastNoCheck(GetMapObj()->at(key)); + } /*! * \brief Read element from map. * \param key The key * \return the corresonding element. */ - const V operator[](const K& key) const { return this->at(key); } + const V operator[](const K& key) const { + return this->at(key); + } /*! \return The size of the array */ size_t size() const { MapObj* n = GetMapObj(); @@ -286,7 +321,9 @@ class Map : public ObjectRef { return n == nullptr ? 0 : GetMapObj()->count(key); } /*! \return whether array is empty */ - bool empty() const { return size() == 0; } + bool empty() const { + return size() == 0; + } /*! * \brief set the Map. * \param key The index key. @@ -297,13 +334,21 @@ class Map : public ObjectRef { MapObj::InsertMaybeReHash(MapObj::KVType(key, value), &data_); } /*! \return begin iterator */ - iterator begin() const { return iterator(GetMapObj()->begin()); } + iterator begin() const { + return iterator(GetMapObj()->begin()); + } /*! \return end iterator */ - iterator end() const { return iterator(GetMapObj()->end()); } + iterator end() const { + return iterator(GetMapObj()->end()); + } /*! \return find the key and returns the associated iterator */ - iterator find(const K& key) const { return iterator(GetMapObj()->find(key)); } + iterator find(const K& key) const { + return iterator(GetMapObj()->find(key)); + } - void erase(const K& key) { CopyOnWrite()->erase(key); } + void erase(const K& key) { + CopyOnWrite()->erase(key); + } /*! * \brief copy on write semantics @@ -328,17 +373,21 @@ class Map : public ObjectRef { class iterator { public: using iterator_category = std::bidirectional_iterator_tag; - using difference_type = int64_t; - using value_type = const std::pair; - using pointer = value_type*; - using reference = value_type; + using difference_type = int64_t; + using value_type = const std::pair; + using pointer = value_type*; + using reference = value_type; iterator() : itr() {} /*! \brief Compare iterators */ - bool operator==(const iterator& other) const { return itr == other.itr; } + bool operator==(const iterator& other) const { + return itr == other.itr; + } /*! \brief Compare iterators */ - bool operator!=(const iterator& other) const { return itr != other.itr; } + bool operator!=(const iterator& other) const { + return itr != other.itr; + } /*! \brief De-reference iterators is not allowed */ pointer operator->() const = delete; /*! \brief De-reference iterators */ @@ -370,7 +419,9 @@ class Map : public ObjectRef { private: /*! \brief Return data_ as type of pointer of MapObj */ - MapObj* GetMapObj() const { return static_cast(data_.get()); } + MapObj* GetMapObj() const { + return static_cast(data_.get()); + } }; /*! @@ -379,7 +430,8 @@ class Map : public ObjectRef { * \param rhs the second Map to merge. * @return The merged Array. Original Maps are kept unchanged. */ -template ::value>::type, typename = typename std::enable_if::value>::type> inline Map Merge(Map lhs, const Map& rhs) { @@ -399,7 +451,7 @@ class StringObj : public Object { uint64_t size; static constexpr const uint32_t _type_index = TypeIndex::kMXNetString; - static constexpr const char* _type_key = "MXNet.String"; + static constexpr const char* _type_key = "MXNet.String"; MXNET_DECLARE_FINAL_OBJECT_INFO(StringObj, Object); private: @@ -515,7 +567,9 @@ class String : public ObjectRef { * * \return const char* */ - const char* c_str() const { return get()->data; } + const char* c_str() const { + return get()->data; + } /*! * \brief Return the length of the string @@ -532,33 +586,41 @@ class String : public ObjectRef { * * \return size_t string length */ - size_t length() const { return size(); } + size_t length() const { + return size(); + } /*! * \brief Retun if the string is empty * * \return true if empty, false otherwise. */ - bool empty() const { return size() == 0; } + bool empty() const { + return size() == 0; + } /*! * \brief Return the data pointer * * \return const char* data pointer */ - const char* data() const { return get()->data; } + const char* data() const { + return get()->data; + } /*! * \brief Convert String to an std::string object * * \return std::string */ - operator std::string() const { return std::string{get()->data, size()}; } + operator std::string() const { + return std::string{get()->data, size()}; + } /*! - * \brief Check if a MXNetArgValue can be converted to String, i.e. it can be std::string or String - * \param val The value to be checked - * \return A boolean indicating if val can be converted to String + * \brief Check if a MXNetArgValue can be converted to String, i.e. it can be std::string or + * String \param val The value to be checked \return A boolean indicating if val can be converted + * to String */ inline static bool CanConvertFrom(const MXNetArgValue& val); @@ -636,10 +698,10 @@ class StringObj::FromStd : public StringObj { }; inline String::String(std::string other) { - auto ptr = make_object(std::move(other)); + auto ptr = make_object(std::move(other)); ptr->size = ptr->data_container.size(); ptr->data = ptr->data_container.data(); - data_ = std::move(ptr); + data_ = std::move(ptr); } inline String& String::operator=(std::string other) { @@ -648,7 +710,9 @@ inline String& String::operator=(std::string other) { return *this; } -inline String& String::operator=(const char* other) { return operator=(std::string(other)); } +inline String& String::operator=(const char* other) { + return operator=(std::string(other)); +} inline String operator+(const String& lhs, const String& rhs) { size_t lhs_size = lhs.size(); @@ -681,70 +745,130 @@ inline String operator+(const String& lhs, const char* rhs) { } // Overload < operator -inline bool operator<(const String& lhs, const std::string& rhs) { return lhs.compare(rhs) < 0; } +inline bool operator<(const String& lhs, const std::string& rhs) { + return lhs.compare(rhs) < 0; +} -inline bool operator<(const std::string& lhs, const String& rhs) { return rhs.compare(lhs) > 0; } +inline bool operator<(const std::string& lhs, const String& rhs) { + return rhs.compare(lhs) > 0; +} -inline bool operator<(const String& lhs, const String& rhs) { return lhs.compare(rhs) < 0; } +inline bool operator<(const String& lhs, const String& rhs) { + return lhs.compare(rhs) < 0; +} -inline bool operator<(const String& lhs, const char* rhs) { return lhs.compare(rhs) < 0; } +inline bool operator<(const String& lhs, const char* rhs) { + return lhs.compare(rhs) < 0; +} -inline bool operator<(const char* lhs, const String& rhs) { return rhs.compare(lhs) > 0; } +inline bool operator<(const char* lhs, const String& rhs) { + return rhs.compare(lhs) > 0; +} // Overload > operator -inline bool operator>(const String& lhs, const std::string& rhs) { return lhs.compare(rhs) > 0; } +inline bool operator>(const String& lhs, const std::string& rhs) { + return lhs.compare(rhs) > 0; +} -inline bool operator>(const std::string& lhs, const String& rhs) { return rhs.compare(lhs) < 0; } +inline bool operator>(const std::string& lhs, const String& rhs) { + return rhs.compare(lhs) < 0; +} -inline bool operator>(const String& lhs, const String& rhs) { return lhs.compare(rhs) > 0; } +inline bool operator>(const String& lhs, const String& rhs) { + return lhs.compare(rhs) > 0; +} -inline bool operator>(const String& lhs, const char* rhs) { return lhs.compare(rhs) > 0; } +inline bool operator>(const String& lhs, const char* rhs) { + return lhs.compare(rhs) > 0; +} -inline bool operator>(const char* lhs, const String& rhs) { return rhs.compare(lhs) < 0; } +inline bool operator>(const char* lhs, const String& rhs) { + return rhs.compare(lhs) < 0; +} // Overload <= operator -inline bool operator<=(const String& lhs, const std::string& rhs) { return lhs.compare(rhs) <= 0; } +inline bool operator<=(const String& lhs, const std::string& rhs) { + return lhs.compare(rhs) <= 0; +} -inline bool operator<=(const std::string& lhs, const String& rhs) { return rhs.compare(lhs) >= 0; } +inline bool operator<=(const std::string& lhs, const String& rhs) { + return rhs.compare(lhs) >= 0; +} -inline bool operator<=(const String& lhs, const String& rhs) { return lhs.compare(rhs) <= 0; } +inline bool operator<=(const String& lhs, const String& rhs) { + return lhs.compare(rhs) <= 0; +} -inline bool operator<=(const String& lhs, const char* rhs) { return lhs.compare(rhs) <= 0; } +inline bool operator<=(const String& lhs, const char* rhs) { + return lhs.compare(rhs) <= 0; +} -inline bool operator<=(const char* lhs, const String& rhs) { return rhs.compare(lhs) >= 0; } +inline bool operator<=(const char* lhs, const String& rhs) { + return rhs.compare(lhs) >= 0; +} // Overload >= operator -inline bool operator>=(const String& lhs, const std::string& rhs) { return lhs.compare(rhs) >= 0; } +inline bool operator>=(const String& lhs, const std::string& rhs) { + return lhs.compare(rhs) >= 0; +} -inline bool operator>=(const std::string& lhs, const String& rhs) { return rhs.compare(lhs) <= 0; } +inline bool operator>=(const std::string& lhs, const String& rhs) { + return rhs.compare(lhs) <= 0; +} -inline bool operator>=(const String& lhs, const String& rhs) { return lhs.compare(rhs) >= 0; } +inline bool operator>=(const String& lhs, const String& rhs) { + return lhs.compare(rhs) >= 0; +} -inline bool operator>=(const String& lhs, const char* rhs) { return lhs.compare(rhs) >= 0; } +inline bool operator>=(const String& lhs, const char* rhs) { + return lhs.compare(rhs) >= 0; +} -inline bool operator>=(const char* lhs, const String& rhs) { return rhs.compare(rhs) <= 0; } +inline bool operator>=(const char* lhs, const String& rhs) { + return rhs.compare(rhs) <= 0; +} // Overload == operator -inline bool operator==(const String& lhs, const std::string& rhs) { return lhs.compare(rhs) == 0; } +inline bool operator==(const String& lhs, const std::string& rhs) { + return lhs.compare(rhs) == 0; +} -inline bool operator==(const std::string& lhs, const String& rhs) { return rhs.compare(lhs) == 0; } +inline bool operator==(const std::string& lhs, const String& rhs) { + return rhs.compare(lhs) == 0; +} -inline bool operator==(const String& lhs, const String& rhs) { return lhs.compare(rhs) == 0; } +inline bool operator==(const String& lhs, const String& rhs) { + return lhs.compare(rhs) == 0; +} -inline bool operator==(const String& lhs, const char* rhs) { return lhs.compare(rhs) == 0; } +inline bool operator==(const String& lhs, const char* rhs) { + return lhs.compare(rhs) == 0; +} -inline bool operator==(const char* lhs, const String& rhs) { return rhs.compare(lhs) == 0; } +inline bool operator==(const char* lhs, const String& rhs) { + return rhs.compare(lhs) == 0; +} // Overload != operator -inline bool operator!=(const String& lhs, const std::string& rhs) { return lhs.compare(rhs) != 0; } +inline bool operator!=(const String& lhs, const std::string& rhs) { + return lhs.compare(rhs) != 0; +} -inline bool operator!=(const std::string& lhs, const String& rhs) { return rhs.compare(lhs) != 0; } +inline bool operator!=(const std::string& lhs, const String& rhs) { + return rhs.compare(lhs) != 0; +} -inline bool operator!=(const String& lhs, const String& rhs) { return lhs.compare(rhs) != 0; } +inline bool operator!=(const String& lhs, const String& rhs) { + return lhs.compare(rhs) != 0; +} -inline bool operator!=(const String& lhs, const char* rhs) { return lhs.compare(rhs) != 0; } +inline bool operator!=(const String& lhs, const char* rhs) { + return lhs.compare(rhs) != 0; +} -inline bool operator!=(const char* lhs, const String& rhs) { return rhs.compare(lhs) != 0; } +inline bool operator!=(const char* lhs, const String& rhs) { + return rhs.compare(lhs) != 0; +} inline std::ostream& operator<<(std::ostream& out, const String& input) { out.write(input.data(), input.size()); @@ -752,11 +876,14 @@ inline std::ostream& operator<<(std::ostream& out, const String& input) { } inline int String::memncmp(const char* lhs, const char* rhs, size_t lhs_count, size_t rhs_count) { - if (lhs == rhs && lhs_count == rhs_count) return 0; + if (lhs == rhs && lhs_count == rhs_count) + return 0; for (size_t i = 0; i < lhs_count && i < rhs_count; ++i) { - if (lhs[i] < rhs[i]) return -1; - if (lhs[i] > rhs[i]) return 1; + if (lhs[i] < rhs[i]) + return -1; + if (lhs[i] > rhs[i]) + return 1; } if (lhs_count < rhs_count) { return -1; diff --git a/include/mxnet/runtime/data_type.h b/include/mxnet/runtime/data_type.h index 01d776322e68..78c41bead76d 100644 --- a/include/mxnet/runtime/data_type.h +++ b/include/mxnet/runtime/data_type.h @@ -29,7 +29,6 @@ #include #include - namespace mxnet { namespace runtime { /*! @@ -42,9 +41,9 @@ class MXNetDataType { public: /*! \brief Type code for the MXNetDataType. */ enum TypeCode { - kInt = kDLInt, - kUInt = kDLUInt, - kFloat = kDLFloat, + kInt = kDLInt, + kUInt = kDLUInt, + kFloat = kDLFloat, kHandle = MXNetTypeCode::kHandle, }; /*! \brief default constructor */ @@ -53,8 +52,7 @@ class MXNetDataType { * \brief Constructor * \param dtype The DLDataType */ - explicit MXNetDataType(DLDataType dtype) - : data_(dtype) {} + explicit MXNetDataType(DLDataType dtype) : data_(dtype) {} /*! * \brief Constructor * \param code The type code. @@ -62,8 +60,8 @@ class MXNetDataType { * \param lanes The number of lanes. */ MXNetDataType(int code, int bits, int lanes) { - data_.code = static_cast(code); - data_.bits = static_cast(bits); + data_.code = static_cast(code); + data_.bits = static_cast(bits); data_.lanes = static_cast(lanes); } /*! \return The type code. */ @@ -139,10 +137,8 @@ class MXNetDataType { * \return The comparison resilt. */ bool operator==(const MXNetDataType& other) const { - return - data_.code == other.data_.code && - data_.bits == other.data_.bits && - data_.lanes == other.data_.lanes; + return data_.code == other.data_.code && data_.bits == other.data_.bits && + data_.lanes == other.data_.lanes; } /*! * \brief NotEqual comparator. @@ -156,7 +152,7 @@ class MXNetDataType { * \brief Converter to DLDataType * \return the result. */ - operator DLDataType () const { + operator DLDataType() const { return data_; } diff --git a/include/mxnet/runtime/ffi_helper.h b/include/mxnet/runtime/ffi_helper.h index cfc79a6c4f47..83896dd8bbe2 100644 --- a/include/mxnet/runtime/ffi_helper.h +++ b/include/mxnet/runtime/ffi_helper.h @@ -37,7 +37,7 @@ namespace runtime { class EllipsisObj : public Object { public: static constexpr const uint32_t _type_index = TypeIndex::kEllipsis; - static constexpr const char* _type_key = "MXNet.Ellipsis"; + static constexpr const char* _type_key = "MXNet.Ellipsis"; MXNET_DECLARE_FINAL_OBJECT_INFO(EllipsisObj, Object) }; @@ -53,23 +53,23 @@ class SliceObj : public Object { int64_t step; static constexpr const uint32_t _type_index = TypeIndex::kSlice; - static constexpr const char* _type_key = "MXNet.Slice"; + static constexpr const char* _type_key = "MXNet.Slice"; MXNET_DECLARE_FINAL_OBJECT_INFO(SliceObj, Object) }; class Slice : public ObjectRef { public: - explicit inline Slice(int64_t start, int64_t stop, int64_t step, + explicit inline Slice(int64_t start, + int64_t stop, + int64_t step, ObjectPtr&& data = make_object()) { data->start = start; - data->stop = stop; - data->step = step; - data_ = std::move(data); + data->stop = stop; + data->step = step; + data_ = std::move(data); } - explicit inline Slice(int64_t stop) - : Slice(kNoneValue, stop, kNoneValue) { - } + explicit inline Slice(int64_t stop) : Slice(kNoneValue, stop, kNoneValue) {} // constant to represent None. static constexpr int64_t kNoneValue = std::numeric_limits::min(); @@ -81,38 +81,36 @@ int64_t inline SliceNoneValue() { return Slice::kNoneValue; } -class IntegerObj: public Object { +class IntegerObj : public Object { public: int64_t value; static constexpr const uint32_t _type_index = TypeIndex::kInteger; - static constexpr const char* _type_key = "MXNet.Integer"; + static constexpr const char* _type_key = "MXNet.Integer"; MXNET_DECLARE_FINAL_OBJECT_INFO(IntegerObj, Object) }; -class Integer: public ObjectRef { +class Integer : public ObjectRef { public: - explicit Integer(int64_t value, - ObjectPtr&& data = make_object()) { + explicit Integer(int64_t value, ObjectPtr&& data = make_object()) { data->value = value; - data_ = std::move(data); + data_ = std::move(data); } MXNET_DEFINE_OBJECT_REF_METHODS(Integer, ObjectRef, IntegerObj) }; -class FloatObj: public Object { +class FloatObj : public Object { public: double value; static constexpr const uint32_t _type_index = TypeIndex::kFloat; - static constexpr const char* _type_key = "MXNet.Float"; + static constexpr const char* _type_key = "MXNet.Float"; MXNET_DECLARE_FINAL_OBJECT_INFO(FloatObj, Object) }; -class Float: public ObjectRef { +class Float : public ObjectRef { public: - explicit Float(double value, - ObjectPtr&& data = make_object()) { + explicit Float(double value, ObjectPtr&& data = make_object()) { data->value = value; - data_ = std::move(data); + data_ = std::move(data); } MXNET_DEFINE_OBJECT_REF_METHODS(Float, ObjectRef, FloatObj) }; diff --git a/include/mxnet/runtime/memory.h b/include/mxnet/runtime/memory.h index ea4b5a409d1e..057c7c3d3689 100644 --- a/include/mxnet/runtime/memory.h +++ b/include/mxnet/runtime/memory.h @@ -37,7 +37,7 @@ namespace runtime { * \tparam T the node type. * \return The ObjectPtr to the allocated object. */ -template +template inline ObjectPtr make_object(Args&&... args); // Detail implementations after this @@ -56,7 +56,7 @@ inline ObjectPtr make_object(Args&&... args); * * \tparam Derived The derived class. */ -template +template class ObjAllocatorBase { public: /*! @@ -65,15 +65,13 @@ class ObjAllocatorBase { * \tparam Args The constructor signature. * \param args The arguments. */ - template + template inline ObjectPtr make_object(Args&&... args) { using Handler = typename Derived::template Handler; - static_assert(std::is_base_of::value, - "make can only be used to create Object"); - T* ptr = Handler::New(static_cast(this), - std::forward(args)...); + static_assert(std::is_base_of::value, "make can only be used to create Object"); + T* ptr = Handler::New(static_cast(this), std::forward(args)...); ptr->type_index_ = T::RuntimeTypeIndex(); - ptr->deleter_ = Handler::Deleter(); + ptr->deleter_ = Handler::Deleter(); return ObjectPtr(ptr); } @@ -84,30 +82,28 @@ class ObjAllocatorBase { * \param num_elems The number of array elements. * \param args The arguments. */ - template + template inline ObjectPtr make_inplace_array(size_t num_elems, Args&&... args) { using Handler = typename Derived::template ArrayHandler; static_assert(std::is_base_of::value, "make_inplace_array can only be used to create Object"); - ArrayType* ptr = Handler::New(static_cast(this), - num_elems, - std::forward(args)...); + ArrayType* ptr = + Handler::New(static_cast(this), num_elems, std::forward(args)...); ptr->type_index_ = ArrayType::RuntimeTypeIndex(); - ptr->deleter_ = Handler::Deleter(); + ptr->deleter_ = Handler::Deleter(); return ObjectPtr(ptr); } }; // Simple allocator that uses new/delete. -class SimpleObjAllocator : - public ObjAllocatorBase { +class SimpleObjAllocator : public ObjAllocatorBase { public: - template + template class Handler { public: using StorageType = typename std::aligned_storage::type; - template + template static T* New(SimpleObjAllocator*, Args&&... args) { // NOTE: the first argument is not needed for SimpleObjAllocator // It is reserved for special allocators that needs to recycle @@ -147,16 +143,16 @@ class SimpleObjAllocator : }; // Array handler that uses new/delete. - template + template class ArrayHandler { public: using StorageType = typename std::aligned_storage::type; // for now only support elements that aligns with array header. static_assert(alignof(ArrayType) % alignof(ElemType) == 0 && - sizeof(ArrayType) % alignof(ElemType) == 0, + sizeof(ArrayType) % alignof(ElemType) == 0, "element alignment constraint"); - template + template static ArrayType* New(SimpleObjAllocator*, size_t num_elems, Args&&... args) { // NOTE: the first argument is not needed for ArrayObjAllocator // It is reserved for special allocators that needs to recycle @@ -170,10 +166,10 @@ class SimpleObjAllocator : // class with non-virtual destructor. // We are fine here as we captured the right deleter during construction. // This is also the right way to get storage type for an object pool. - size_t unit = sizeof(StorageType); - size_t requested_size = num_elems * sizeof(ElemType) + sizeof(ArrayType); + size_t unit = sizeof(StorageType); + size_t requested_size = num_elems * sizeof(ElemType) + sizeof(ArrayType); size_t num_storage_slots = (requested_size + unit - 1) / unit; - StorageType* data = new StorageType[num_storage_slots]; + StorageType* data = new StorageType[num_storage_slots]; new (data) ArrayType(std::forward(args)...); return reinterpret_cast(data); } @@ -194,20 +190,20 @@ class SimpleObjAllocator : // call a virtual destructor(which may not be available and is not required). tptr->ArrayType::~ArrayType(); StorageType* p = reinterpret_cast(tptr); - delete []p; + delete[] p; } }; }; -template +template inline ObjectPtr make_object(Args&&... args) { return SimpleObjAllocator().make_object(std::forward(args)...); } -template +template inline ObjectPtr make_inplace_array_object(size_t num_elems, Args&&... args) { - return SimpleObjAllocator().make_inplace_array( - num_elems, std::forward(args)...); + return SimpleObjAllocator().make_inplace_array(num_elems, + std::forward(args)...); } } // namespace runtime diff --git a/include/mxnet/runtime/ndarray.h b/include/mxnet/runtime/ndarray.h index 317c3239092d..666fc12a6787 100644 --- a/include/mxnet/runtime/ndarray.h +++ b/include/mxnet/runtime/ndarray.h @@ -34,7 +34,7 @@ namespace runtime { * For TVM NDArray itself, code = 0. * All subclasses of NDArray should override code > 0. */ -template +template struct array_type_info { /*! \brief the value of the traits */ static const int code = -1; diff --git a/include/mxnet/runtime/ndarray_handle.h b/include/mxnet/runtime/ndarray_handle.h index 22ebc2c09048..d8d2819b966f 100644 --- a/include/mxnet/runtime/ndarray_handle.h +++ b/include/mxnet/runtime/ndarray_handle.h @@ -41,8 +41,8 @@ class NDArrayHandle : public ObjectRef { public: explicit NDArrayHandle(NDArray* value) { runtime::ObjectPtr node = make_object(); - node->value = *value; - data_ = std::move(node); + node->value = *value; + data_ = std::move(node); } inline NDArray* getArray() const { return static_cast(&(static_cast(data_.get())->value)); diff --git a/include/mxnet/runtime/object.h b/include/mxnet/runtime/object.h index 0b679c7fefd8..d8ec1ee2fd50 100644 --- a/include/mxnet/runtime/object.h +++ b/include/mxnet/runtime/object.h @@ -48,18 +48,18 @@ namespace mxnet { namespace runtime { /*! \brief list of the type index. */ -enum TypeIndex { +enum TypeIndex { /*! \brief Root object type. */ - kRoot = 0, - kMXNetTensor = 1, + kRoot = 0, + kMXNetTensor = 1, kMXNetClosure = 2, - kMXNetADT = 3, - kMXNetMap = 4, - kMXNetString = 5, - kEllipsis = 6, - kSlice = 7, - kInteger = 8, - kFloat = 9, + kMXNetADT = 3, + kMXNetMap = 4, + kMXNetString = 5, + kEllipsis = 6, + kSlice = 7, + kInteger = 8, + kFloat = 9, kStaticIndexEnd, /*! \brief Type index is allocated during runtime. */ kDynamic = kStaticIndexEnd @@ -93,8 +93,8 @@ enum TypeIndex { * Recommendation: set to estimate number of children needed. * - _type_child_slots_can_overflow: * Whether we can add additional child classes even if the number of child classes - * exceeds the _type_child_slots. A fallback mechanism to check global type table will be used. - * Recommendation: set to false for optimal runtime speed if we know exact number of children. + * exceeds the _type_child_slots. A fallback mechanism to check global type table will be + * used. Recommendation: set to false for optimal runtime speed if we know exact number of children. * * Two macros are used to declare helper functions in the object: * - Use MXNET_DECLARE_BASE_OBJECT_INFO for object classes that can be sub-classed. @@ -177,7 +177,7 @@ class Object { * \tparam TargetType The target type to be checked. * \return Whether the target type is true. */ - template + template inline bool IsInstance() const; /*! @@ -215,8 +215,8 @@ class Object { } // Default object type properties for sub-classes - static constexpr bool _type_final = false; - static constexpr uint32_t _type_child_slots = 0; + static constexpr bool _type_final = false; + static constexpr uint32_t _type_child_slots = 0; static constexpr bool _type_child_slots_can_overflow = true; // NOTE: the following field is not type index of Object // but was intended to be used by sub-classes as default value. @@ -234,10 +234,10 @@ class Object { } Object(Object&& other) { // NOLINT(*) } - Object& operator=(const Object& other) { //NOLINT(*) + Object& operator=(const Object& other) { // NOLINT(*) return *this; } - Object& operator=(Object&& other) { //NOLINT(*) + Object& operator=(Object&& other) { // NOLINT(*) return *this; } @@ -255,7 +255,7 @@ class Object { FDeleter deleter_ = nullptr; // Invariant checks. static_assert(sizeof(int32_t) == sizeof(RefCounterType) && - alignof(int32_t) == sizeof(RefCounterType), + alignof(int32_t) == sizeof(RefCounterType), "RefCounter ABI check."); /*! @@ -275,12 +275,11 @@ class Object { * \param type_child_slots_can_overflow Whether to allow child to overflow the slots. * \return The allocated type index. */ - MXNET_DLL static uint32_t GetOrAllocRuntimeTypeIndex( - const std::string& key, - uint32_t static_tindex, - uint32_t parent_tindex, - uint32_t type_child_slots, - bool type_child_slots_can_overflow); + MXNET_DLL static uint32_t GetOrAllocRuntimeTypeIndex(const std::string& key, + uint32_t static_tindex, + uint32_t parent_tindex, + uint32_t type_child_slots, + bool type_child_slots_can_overflow); // reference counter related operations /*! \brief developer function, increases reference counter. */ @@ -304,9 +303,9 @@ class Object { */ MXNET_DLL bool DerivedFrom(uint32_t parent_tindex) const; // friend classes - template + template friend class ObjAllocatorBase; - template + template friend class ObjectPtr; friend class MXNetRetValue; friend class ObjectInternal; @@ -483,9 +482,9 @@ class ObjectPtr { friend class Object; friend class ObjectRef; friend struct ObjectHash; - template + template friend class ObjectPtr; - template + template friend class ObjAllocatorBase; friend class MXNetPODValue_; friend class MXNetArgsSetter; @@ -584,7 +583,7 @@ class ObjectRef { * \tparam T The target reference type. * \return The casted result. */ - template + template static T DowncastNoCheck(ObjectRef ref) { return T(std::move(ref.data_)); } @@ -594,7 +593,7 @@ class ObjectRef { * \tparam ObjectType The corresponding object type. * \return the corresponding type. */ - template + template static ObjectPtr GetDataPtr(const ObjectRef& ref) { return ObjectPtr(ref.data_.data_); } @@ -623,56 +622,53 @@ struct ObjectHash { return operator()(a.data_); } - template + template size_t operator()(const ObjectPtr& a) const { return std::hash()(a.get()); } }; - /*! \brief ObjectRef equal functor */ struct ObjectEqual { bool operator()(const ObjectRef& a, const ObjectRef& b) const { return a.same_as(b); } - template + template size_t operator()(const ObjectPtr& a, const ObjectPtr& b) const { return a == b; } }; - /*! * \brief helper macro to declare a base object type that can be inheritated. * \param TypeName The name of the current type. * \param ParentType The name of the ParentType */ -#define MXNET_DECLARE_BASE_OBJECT_INFO(TypeName, ParentType) \ - static uint32_t RuntimeTypeIndex() { \ - return TypeName::_type_index != ::mxnet::runtime::TypeIndex::kDynamic ? \ - TypeName::_type_index : _GetOrAllocRuntimeTypeIndex(); \ - } \ - static uint32_t _GetOrAllocRuntimeTypeIndex() { \ - static uint32_t tidx = GetOrAllocRuntimeTypeIndex( \ - TypeName::_type_key, \ - TypeName::_type_index, \ - ParentType::_GetOrAllocRuntimeTypeIndex(), \ - TypeName::_type_child_slots, \ - TypeName::_type_child_slots_can_overflow); \ - return tidx; \ +#define MXNET_DECLARE_BASE_OBJECT_INFO(TypeName, ParentType) \ + static uint32_t RuntimeTypeIndex() { \ + return TypeName::_type_index != ::mxnet::runtime::TypeIndex::kDynamic ? \ + TypeName::_type_index : \ + _GetOrAllocRuntimeTypeIndex(); \ + } \ + static uint32_t _GetOrAllocRuntimeTypeIndex() { \ + static uint32_t tidx = GetOrAllocRuntimeTypeIndex(TypeName::_type_key, \ + TypeName::_type_index, \ + ParentType::_GetOrAllocRuntimeTypeIndex(), \ + TypeName::_type_child_slots, \ + TypeName::_type_child_slots_can_overflow); \ + return tidx; \ } /*! * \brief helper macro to declare type information in a final class. - * \param TypeName The name of the current type. - * \param ParentType The name of the ParentType - */ -#define MXNET_DECLARE_FINAL_OBJECT_INFO(TypeName, ParentType) \ - static const constexpr bool _type_final = true; \ - static const constexpr int _type_child_slots = 0; \ - MXNET_DECLARE_BASE_OBJECT_INFO(TypeName, ParentType) \ - + * \param TypeName The name of the current type. + * \param ParentType The name of the ParentType + */ +#define MXNET_DECLARE_FINAL_OBJECT_INFO(TypeName, ParentType) \ + static const constexpr bool _type_final = true; \ + static const constexpr int _type_child_slots = 0; \ + MXNET_DECLARE_BASE_OBJECT_INFO(TypeName, ParentType) /*! * \brief Helper macro to register the object type to runtime. @@ -680,45 +676,49 @@ struct ObjectEqual { * * Use this macro in the cc file for each terminal class. */ -#define MXNET_REGISTER_OBJECT_TYPE(TypeName) \ - static DMLC_ATTRIBUTE_UNUSED uint32_t __make_Object_tidx ## _ ## TypeName ## __ = \ +#define MXNET_REGISTER_OBJECT_TYPE(TypeName) \ + static DMLC_ATTRIBUTE_UNUSED uint32_t __make_Object_tidx##_##TypeName##__ = \ TypeName::_GetOrAllocRuntimeTypeIndex() #define MXNET_DEFINE_DEFAULT_COPY_MOVE_AND_ASSIGN(TypeName) \ TypeName(const TypeName& other) = default; \ - TypeName(TypeName&& other) = default; \ + TypeName(TypeName&& other) = default; \ TypeName& operator=(const TypeName& other) = default; \ TypeName& operator=(TypeName&& other) = default; -#define MXNET_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName) \ - TypeName() {} \ - explicit TypeName( \ - ::mxnet::runtime::ObjectPtr<::mxnet::runtime::Object> n) \ - : ParentType(n) {} \ - const ObjectName* operator->() const { \ - return static_cast(data_.get()); \ - } \ - operator bool() const { return data_ != nullptr; } \ +#define MXNET_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName) \ + TypeName() {} \ + explicit TypeName(::mxnet::runtime::ObjectPtr<::mxnet::runtime::Object> n) : ParentType(n) {} \ + const ObjectName* operator->() const { \ + return static_cast(data_.get()); \ + } \ + operator bool() const { \ + return data_ != nullptr; \ + } \ using ContainerType = ObjectName; -#define MXNET_DEFINE_OBJECT_REF_METHODS_MUT(TypeName, ParentType, ObjectName) \ - TypeName() {} \ - explicit TypeName( \ - ::mxnet::runtime::ObjectPtr<::mxnet::runtime::Object> n) \ - : ParentType(n) {} \ - ObjectName* operator->() { \ - return static_cast(data_.get()); \ - } \ - operator bool() const { return data_ != nullptr; } \ +#define MXNET_DEFINE_OBJECT_REF_METHODS_MUT(TypeName, ParentType, ObjectName) \ + TypeName() {} \ + explicit TypeName(::mxnet::runtime::ObjectPtr<::mxnet::runtime::Object> n) : ParentType(n) {} \ + ObjectName* operator->() { \ + return static_cast(data_.get()); \ + } \ + operator bool() const { \ + return data_ != nullptr; \ + } \ using ContainerType = ObjectName; -#define MXNET_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName) \ - explicit TypeName(::mxnet::runtime::ObjectPtr<::mxnet::runtime::Object> n) : ParentType(n) {} \ - MXNET_DEFINE_DEFAULT_COPY_MOVE_AND_ASSIGN(TypeName); \ - const ObjectName* operator->() const { return static_cast(data_.get()); } \ - const ObjectName* get() const { return operator->(); } \ - static constexpr bool _type_is_nullable = false; \ - using ContainerType = ObjectName; +#define MXNET_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName) \ + explicit TypeName(::mxnet::runtime::ObjectPtr<::mxnet::runtime::Object> n) : ParentType(n) {} \ + MXNET_DEFINE_DEFAULT_COPY_MOVE_AND_ASSIGN(TypeName); \ + const ObjectName* operator->() const { \ + return static_cast(data_.get()); \ + } \ + const ObjectName* get() const { \ + return operator->(); \ + } \ + static constexpr bool _type_is_nullable = false; \ + using ContainerType = ObjectName; // Implementations details below // Object reference counting. @@ -761,14 +761,15 @@ inline int Object::use_count() const { #endif // MXNET_OBJECT_ATOMIC_REF_COUNTER -template +template inline bool Object::IsInstance() const { const Object* self = this; // NOTE: the following code can be optimized by // compiler dead-code elimination for already known constants. if (self != nullptr) { // Everything is a subclass of object. - if (std::is_same::value) return true; + if (std::is_same::value) + return true; if (TargetType::_type_final) { // if the target type is a final type // then we only need to check the equivalence. @@ -780,13 +781,17 @@ inline bool Object::IsInstance() const { // The condition will be optimized by constant-folding. if (TargetType::_type_child_slots != 0) { uint32_t end = begin + TargetType::_type_child_slots; - if (self->type_index_ >= begin && self->type_index_ < end) return true; + if (self->type_index_ >= begin && self->type_index_ < end) + return true; } else { - if (self->type_index_ == begin) return true; + if (self->type_index_ == begin) + return true; } - if (!TargetType::_type_child_slots_can_overflow) return false; + if (!TargetType::_type_child_slots_can_overflow) + return false; // Invariance: parent index is always smaller than the child. - if (self->type_index_ < TargetType::RuntimeTypeIndex()) return false; + if (self->type_index_ < TargetType::RuntimeTypeIndex()) + return false; // The rare slower-path, check type hierachy. return self->DerivedFrom(TargetType::RuntimeTypeIndex()); } @@ -795,11 +800,9 @@ inline bool Object::IsInstance() const { } } - template inline const ObjectType* ObjectRef::as() const { - if (data_ != nullptr && - data_->IsInstance()) { + if (data_ != nullptr && data_->IsInstance()) { return static_cast(data_.get()); } else { return nullptr; @@ -827,8 +830,8 @@ template inline SubRef Downcast(BaseRef ref) { if (ref.defined()) { CHECK(ref->template IsInstance()) - << "Downcast from " << ref->GetTypeKey() << " to " - << SubRef::ContainerType::_type_key << " failed."; + << "Downcast from " << ref->GetTypeKey() << " to " << SubRef::ContainerType::_type_key + << " failed."; } else { CHECK(SubRef::_type_is_nullable) << "Downcast from nullptr to not nullable reference of " << SubRef::ContainerType::_type_key; @@ -838,7 +841,7 @@ inline SubRef Downcast(BaseRef ref) { } // namespace runtime -template +template using NodePtr = runtime::ObjectPtr; } // namespace mxnet diff --git a/include/mxnet/runtime/packed_func.h b/include/mxnet/runtime/packed_func.h index 40ad7bb31ba6..1b5035afd690 100644 --- a/include/mxnet/runtime/packed_func.h +++ b/include/mxnet/runtime/packed_func.h @@ -97,7 +97,7 @@ class PackedFunc { * } * \endcode */ - using FType = std::function; + using FType = std::function; /*! \brief default constructor */ PackedFunc() {} /*! \brief constructor from null */ @@ -121,8 +121,8 @@ class PackedFunc { * } * \endcode */ - template - inline MXNetRetValue operator()(Args&& ...args) const; + template + inline MXNetRetValue operator()(Args&&... args) const; /*! * \brief Call the function in packed format. * \param args The arguments @@ -148,7 +148,7 @@ class PackedFunc { /*! * \brief Please refer to \ref TypedPackedFuncAnchor "TypedPackedFunc" */ -template +template class TypedPackedFunc; /*! @@ -183,7 +183,7 @@ class TypedPackedFunc; * \tparam R The return value of the function. * \tparam Args The argument signature of the function. */ -template +template class TypedPackedFunc { public: /*! \brief short hand for this function type */ @@ -235,11 +235,10 @@ class TypedPackedFunc { * \param typed_lambda typed lambda function. * \tparam FLambda the type of the lambda function. */ - template - >::value>::type> + template >::value>::type> TypedPackedFunc(const FLambda& typed_lambda) { // NOLINT(*) this->AssignTypedLambda(typed_lambda); } @@ -259,11 +258,10 @@ class TypedPackedFunc { * \tparam FLambda the type of the lambda function. * \returns reference to self. */ - template - >::value>::type> + template >::value>::type> TSelf& operator=(FLambda typed_lambda) { // NOLINT(*) this->AssignTypedLambda(typed_lambda); return *this; @@ -282,7 +280,7 @@ class TypedPackedFunc { * \param args The arguments * \returns The return value. */ - inline R operator()(Args ...args) const; + inline R operator()(Args... args) const; /*! * \brief convert to PackedFunc * \return the internal PackedFunc @@ -316,7 +314,7 @@ class TypedPackedFunc { * \tparam FLambda The lambda function type. * \note We capture the lambda when possible for maximum efficiency. */ - template + template inline void AssignTypedLambda(FLambda flambda); }; @@ -332,12 +330,8 @@ class MXNetArgs { * \param type_codes The argument type codes * \param num_args number of arguments. */ - MXNetArgs(const MXNetValue* values, - const int* type_codes, - int num_args) - : values(values), - type_codes(type_codes), - num_args(num_args) { } + MXNetArgs(const MXNetValue* values, const int* type_codes, int num_args) + : values(values), type_codes(type_codes), num_args(num_args) {} /*! \return size of the arguments */ inline int size() const; /*! @@ -363,9 +357,8 @@ inline const char* TypeCode2Str(int type_code); // inline TVMType String2TVMType(std::string s); // macro to check type code. -#define MXNET_CHECK_TYPE_CODE(CODE, T) \ - CHECK_EQ(CODE, T) << " expected " \ - << TypeCode2Str(T) << " but get " << TypeCode2Str(CODE) \ +#define MXNET_CHECK_TYPE_CODE(CODE, T) \ + CHECK_EQ(CODE, T) << " expected " << TypeCode2Str(T) << " but get " << TypeCode2Str(CODE) /*! * \brief Type traits to mark if a class is tvm extension type. @@ -378,7 +371,7 @@ inline const char* TypeCode2Str(int type_code); * * \tparam T the typename */ -template +template struct extension_type_info { static const int code = 0; }; @@ -391,7 +384,8 @@ template struct ObjectTypeChecker { static bool Check(const Object* ptr) { using ContainerType = typename T::ContainerType; - if (ptr == nullptr) return T::_type_is_nullable; + if (ptr == nullptr) + return T::_type_is_nullable; return ptr->IsInstance(); } static std::string TypeName() { @@ -426,8 +420,7 @@ class MXNetPODValue_ { } operator int() const { MXNET_CHECK_TYPE_CODE(type_code_, kDLInt); - CHECK_LE(value_.v_int64, - std::numeric_limits::max()); + CHECK_LE(value_.v_int64, std::numeric_limits::max()); return static_cast(value_.v_int64); } operator bool() const { @@ -435,7 +428,8 @@ class MXNetPODValue_ { return value_.v_int64 != 0; } operator void*() const { - if (type_code_ == kNull) return nullptr; + if (type_code_ == kNull) + return nullptr; MXNET_CHECK_TYPE_CODE(type_code_, kHandle); return value_.v_handle; } @@ -444,12 +438,10 @@ class MXNetPODValue_ { return ObjectRef(ObjectPtr(nullptr)); } MXNET_CHECK_TYPE_CODE(type_code_, kObjectHandle); - return ObjectRef( - ObjectPtr(static_cast(value_.v_handle))); + return ObjectRef(ObjectPtr(static_cast(value_.v_handle))); } - template::value>::type> + template ::value>::type> inline bool IsObjectRef() const; template inline TObjectRef AsObjectRef() const; @@ -462,7 +454,7 @@ class MXNetPODValue_ { * \tparam T the data type. * \return The pointer type. */ - template + template T* ptr() const { return static_cast(value_.v_handle); } @@ -471,8 +463,7 @@ class MXNetPODValue_ { friend class MXNetArgsSetter; friend class MXNetRetValue; MXNetPODValue_() : type_code_(kNull) {} - MXNetPODValue_(MXNetValue value, int type_code) - : value_(value), type_code_(type_code) {} + MXNetPODValue_(MXNetValue value, int type_code) : value_(value), type_code_(type_code) {} /*! \brief The value */ MXNetValue value_; @@ -495,9 +486,7 @@ class MXNetArgValue : public MXNetPODValue_ { * \param value of the function * \param type_code The type code. */ - MXNetArgValue(MXNetValue value, int type_code) - : MXNetPODValue_(value, type_code) { - } + MXNetArgValue(MXNetValue value, int type_code) : MXNetPODValue_(value, type_code) {} // reuse converter from parent using MXNetPODValue_::operator double; using MXNetPODValue_::operator int64_t; @@ -506,8 +495,8 @@ class MXNetArgValue : public MXNetPODValue_ { using MXNetPODValue_::operator bool; using MXNetPODValue_::operator void*; using MXNetPODValue_::operator ObjectRef; - using MXNetPODValue_::IsObjectRef; using MXNetPODValue_::AsObjectRef; + using MXNetPODValue_::IsObjectRef; // conversion operator. operator std::string() const { @@ -526,7 +515,9 @@ class MXNetArgValue : public MXNetPODValue_ { // None type if (type_code_ == kNull) { DLDataType t; - t.code = kHandle; t.bits = 0; t.lanes = 0; + t.code = kHandle; + t.bits = 0; + t.lanes = 0; return t; } MXNET_CHECK_TYPE_CODE(type_code_, kMXNetType); @@ -542,16 +533,14 @@ class MXNetArgValue : public MXNetPODValue_ { MXNET_CHECK_TYPE_CODE(type_code_, kNDArrayHandle); return reinterpret_cast<::mxnet::NDArray*>(value_.v_handle); } - template + template operator TypedPackedFunc() const { return TypedPackedFunc(operator PackedFunc()); } const MXNetValue& value() const { return value_; } - template::value>::type> + template ::value>::type> inline operator T() const; }; @@ -571,10 +560,9 @@ class MXNetRetValue : public MXNetPODValue_ { * \brief move constructor from anoter return value. * \param other The other return value. */ - MXNetRetValue(MXNetRetValue&& other) - : MXNetPODValue_(other.value_, other.type_code_) { + MXNetRetValue(MXNetRetValue&& other) : MXNetPODValue_(other.value_, other.type_code_) { other.value_.v_handle = nullptr; - other.type_code_ = kNull; + other.type_code_ = kNull; } /*! \brief destructor */ ~MXNetRetValue() { @@ -588,8 +576,8 @@ class MXNetRetValue : public MXNetPODValue_ { using MXNetPODValue_::operator bool; using MXNetPODValue_::operator void*; using MXNetPODValue_::operator ObjectRef; - using MXNetPODValue_::IsObjectRef; using MXNetPODValue_::AsObjectRef; + using MXNetPODValue_::IsObjectRef; MXNetRetValue(const MXNetRetValue& other) : MXNetPODValue_() { this->Assign(other); @@ -612,15 +600,15 @@ class MXNetRetValue : public MXNetPODValue_ { operator MXNetDataType() const { return MXNetDataType(operator DLDataType()); } - template + template operator TypedPackedFunc() const { return TypedPackedFunc(operator PackedFunc()); } // Assign operators MXNetRetValue& operator=(MXNetRetValue&& other) { this->Clear(); - value_ = other.value_; - type_code_ = other.type_code_; + value_ = other.value_; + type_code_ = other.type_code_; other.type_code_ = kNull; return *this; } @@ -676,12 +664,12 @@ class MXNetRetValue : public MXNetPODValue_ { } return operator=(std::move(other.data_)); } - template + template MXNetRetValue& operator=(ObjectPtr other) { SwitchToObject(kObjectHandle, std::move(other)); return *this; } - template + template MXNetRetValue& operator=(const TypedPackedFunc& f) { return operator=(f.packed()); } @@ -700,7 +688,7 @@ class MXNetRetValue : public MXNetPODValue_ { } MXNetRetValue& operator=(NDArrayHandle value) { this->SwitchToPOD(kNDArrayHandle); - NDArray* arr = new NDArray(value->value); + NDArray* arr = new NDArray(value->value); value_.v_handle = reinterpret_cast(arr); return *this; } @@ -709,12 +697,9 @@ class MXNetRetValue : public MXNetPODValue_ { value_.v_int64 = value.offset(); return *this; } - template::code != 0>::type> + template ::code != 0>::type> MXNetRetValue& operator=(const T& other) { - this->SwitchToClass( - extension_type_info::code, other); + this->SwitchToClass(extension_type_info::code, other); return *this; } /*! @@ -726,28 +711,25 @@ class MXNetRetValue : public MXNetPODValue_ { * \param ret_value The return value. * \param ret_type_code The return type code. */ - void MoveToCHost(MXNetValue* ret_value, - int* ret_type_code) { + void MoveToCHost(MXNetValue* ret_value, int* ret_type_code) { // cannot move str; need specially handle. CHECK(type_code_ != kStr && type_code_ != kBytes); - *ret_value = value_; + *ret_value = value_; *ret_type_code = type_code_; - type_code_ = kNull; + type_code_ = kNull; } /*! \return The value field, if the data is POD */ const MXNetValue& value() const { - CHECK(type_code_ != kObjectHandle && - type_code_ != kStr) << "MXNetRetValue.value can only be used for POD data"; + CHECK(type_code_ != kObjectHandle && type_code_ != kStr) + << "MXNetRetValue.value can only be used for POD data"; return value_; } // ObjectRef related extenstions: in tvm/packed_func_ext.h - template::value>::type> + template ::value>::type> inline operator T() const; private: - template + template void Assign(const T& other) { switch (other.type_code()) { case kStr: { @@ -780,11 +762,11 @@ class MXNetRetValue : public MXNetPODValue_ { type_code_ = type_code; } } - template + template void SwitchToClass(int type_code, T v) { if (type_code_ != type_code) { this->Clear(); - type_code_ = type_code; + type_code_ = type_code; value_.v_handle = new T(v); } else { *static_cast(value_.v_handle) = v; @@ -796,15 +778,18 @@ class MXNetRetValue : public MXNetPODValue_ { type_code_ = type_code; // move the handle out value_.v_handle = other.data_; - other.data_ = nullptr; + other.data_ = nullptr; } else { SwitchToPOD(kNull); } } void Clear() { - if (type_code_ == kNull) return; + if (type_code_ == kNull) + return; switch (type_code_) { - case kStr: delete ptr(); break; + case kStr: + delete ptr(); + break; case kObjectHandle: { static_cast(value_.v_handle)->DecRef(); break; @@ -821,24 +806,30 @@ inline DLDataType String2DLDataType(std::string s) { DLDataType t; // handle None type if (s.length() == 0) { - t.bits = 0; t.lanes = 0; t.code = kHandle; + t.bits = 0; + t.lanes = 0; + t.code = kHandle; return t; } - t.bits = 32; t.lanes = 1; + t.bits = 32; + t.lanes = 1; const char* scan = nullptr; if (s.substr(0, 3) == "int") { - t.code = kDLInt; scan = s.c_str() + 3; + t.code = kDLInt; + scan = s.c_str() + 3; } else if (s.substr(0, 4) == "uint") { - t.code = kDLUInt; scan = s.c_str() + 4; + t.code = kDLUInt; + scan = s.c_str() + 4; } else if (s.substr(0, 5) == "float") { - t.code = kDLFloat; scan = s.c_str() + 5; + t.code = kDLFloat; + scan = s.c_str() + 5; } else if (s.substr(0, 6) == "handle") { t.code = kHandle; t.bits = 64; // handle uses 64 bit by default. - scan = s.c_str() + 6; + scan = s.c_str() + 6; } else if (s == "bool") { - t.code = kDLUInt; - t.bits = 1; + t.code = kDLUInt; + t.bits = 1; t.lanes = 1; return t; } else if (s.substr(0, 6) == "custom") { @@ -850,7 +841,8 @@ inline DLDataType String2DLDataType(std::string s) { } char* xdelim; // emulate sscanf("%ux%u", bits, lanes) uint8_t bits = static_cast(strtoul(scan, &xdelim, 10)); - if (bits != 0) t.bits = bits; + if (bits != 0) + t.bits = bits; char* endpt = xdelim; if (*xdelim == 'x') { t.lanes = static_cast(strtoul(xdelim + 1, &endpt, 10)); @@ -862,17 +854,27 @@ inline DLDataType String2DLDataType(std::string s) { // implementation details inline const char* TypeCode2Str(int type_code) { switch (type_code) { - case kDLInt: return "int"; - case kDLUInt: return "uint"; - case kDLFloat: return "float"; - case kStr: return "str"; - case kBytes: return "bytes"; - case kHandle: return "handle"; - case kNull: return "NULL"; - case kObjectHandle: return "ObjectCell"; - case kNDArrayHandle: return "NDArray"; - default: LOG(FATAL) << "unknown type_code=" - << static_cast(type_code); return ""; + case kDLInt: + return "int"; + case kDLUInt: + return "uint"; + case kDLFloat: + return "float"; + case kStr: + return "str"; + case kBytes: + return "bytes"; + case kHandle: + return "handle"; + case kNull: + return "NULL"; + case kObjectHandle: + return "ObjectCell"; + case kNDArrayHandle: + return "NDArray"; + default: + LOG(FATAL) << "unknown type_code=" << static_cast(type_code); + return ""; } } @@ -940,7 +942,8 @@ inline int String2MXNetType(const std::string& s) { inline std::ostream& operator<<(std::ostream& os, DLDataType t) { // NOLINT(*) if (t.bits == 1 && t.lanes == 1 && t.code == kDLUInt) { - os << "bool"; return os; + os << "bool"; + return os; } if (t.code < kCustomBegin) { os << TypeCode2Str(t.code); @@ -948,7 +951,8 @@ inline std::ostream& operator<<(std::ostream& os, DLDataType t) { // NOLINT(*) LOG(FATAL) << "custom MXNetDataType is not supported"; // os << "custom[" << GetCustomTypeName(t.code) << "]"; } - if (t.code == kHandle) return os; + if (t.code == kHandle) + return os; os << static_cast(t.bits); if (t.lanes != 1) { os << 'x' << static_cast(t.lanes); @@ -956,15 +960,13 @@ inline std::ostream& operator<<(std::ostream& os, DLDataType t) { // NOLINT(*) return os; } -inline std::ostream& operator<<(std::ostream& os, const MXNetDataType& dtype) { // NOLINT(*) +inline std::ostream& operator<<(std::ostream& os, const MXNetDataType& dtype) { // NOLINT(*) return os << dtype.operator DLDataType(); } inline MXNetArgValue MXNetArgs::operator[](int i) const { - CHECK_LT(i, num_args) - << "not enough argument passed, " - << num_args << " passed" - << " but request arg[" << i << "]."; + CHECK_LT(i, num_args) << "not enough argument passed, " << num_args << " passed" + << " but request arg[" << i << "]."; return MXNetArgValue(values[i], type_codes[i]); } @@ -983,93 +985,87 @@ inline PackedFunc::FType PackedFunc::body() const { // internal namespace namespace detail { -template +template struct for_each_dispatcher { - template + template static void run(const F& f, T&& value, Args&&... args) { // NOLINT(*) f(I, std::forward(value)); - for_each_dispatcher - ::run(f, std::forward(args)...); + for_each_dispatcher::run(f, std::forward(args)...); } }; -template -struct for_each_dispatcher { +template +struct for_each_dispatcher { static void run(const F& f) {} // NOLINT(*) }; -template +template inline void for_each(const F& f, Args&&... args) { // NOLINT(*) - for_each_dispatcher - ::run(f, std::forward(args)...); + for_each_dispatcher::run(f, std::forward(args)...); } } // namespace detail /* \brief argument settter to PackedFunc */ class MXNetArgsSetter { public: - MXNetArgsSetter(MXNetValue* values, int* type_codes) - : values_(values), type_codes_(type_codes) {} + MXNetArgsSetter(MXNetValue* values, int* type_codes) : values_(values), type_codes_(type_codes) {} // setters for POD types - template::value>::type> + template ::value>::type> void operator()(size_t i, T value) const { values_[i].v_int64 = static_cast(value); - type_codes_[i] = kDLInt; + type_codes_[i] = kDLInt; } void operator()(size_t i, uint64_t value) const { values_[i].v_int64 = static_cast(value); - CHECK_LE(value, - static_cast(std::numeric_limits::max())); + CHECK_LE(value, static_cast(std::numeric_limits::max())); type_codes_[i] = kDLInt; } void operator()(size_t i, double value) const { values_[i].v_float64 = value; - type_codes_[i] = kDLFloat; + type_codes_[i] = kDLFloat; } void operator()(size_t i, std::nullptr_t value) const { values_[i].v_handle = value; - type_codes_[i] = kNull; + type_codes_[i] = kNull; } void operator()(size_t i, const MXNetArgValue& value) const { - values_[i] = value.value_; + values_[i] = value.value_; type_codes_[i] = value.type_code_; } void operator()(size_t i, void* value) const { values_[i].v_handle = value; - type_codes_[i] = kHandle; + type_codes_[i] = kHandle; } void operator()(size_t i, const char* value) const { values_[i].v_str = value; - type_codes_[i] = kStr; + type_codes_[i] = kStr; } // setters for container type // They must be reference(instead of const ref) // to make sure they are alive in the tuple(instead of getting converted) void operator()(size_t i, const std::string& value) const { // NOLINT(*) values_[i].v_str = value.c_str(); - type_codes_[i] = kStr; + type_codes_[i] = kStr; } void operator()(size_t i, DLDataType value) const { values_[i].v_type = value; - type_codes_[i] = kMXNetType; + type_codes_[i] = kMXNetType; } void operator()(size_t i, MXNetDataType dtype) const { operator()(i, dtype.operator DLDataType()); } void operator()(size_t i, const MXNetByteArray& value) const { // NOLINT(*) values_[i].v_handle = const_cast(&value); - type_codes_[i] = kBytes; + type_codes_[i] = kBytes; } - template + template void operator()(size_t i, const TypedPackedFunc& value) const { // NOLINT(*) operator()(i, value.packed()); } void operator()(size_t i, const ObjectRef& value) const { // NOLINT(*) if (value.defined()) { values_[i].v_handle = value.data_.data_; - type_codes_[i] = kObjectHandle; + type_codes_[i] = kObjectHandle; } else { type_codes_[i] = kNull; } @@ -1077,10 +1073,10 @@ class MXNetArgsSetter { void operator()(size_t i, const MXNetRetValue& value) const { // NOLINT(*) if (value.type_code() == kStr) { values_[i].v_str = value.ptr()->c_str(); - type_codes_[i] = kStr; + type_codes_[i] = kStr; } else { CHECK_NE(value.type_code(), kBytes) << "not handled."; - values_[i] = value.value_; + values_[i] = value.value_; type_codes_[i] = value.type_code(); } } @@ -1092,37 +1088,34 @@ class MXNetArgsSetter { int* type_codes_; }; -template -inline MXNetRetValue PackedFunc::operator()(Args&& ...args) const { - const int kNumArgs = sizeof...(Args); +template +inline MXNetRetValue PackedFunc::operator()(Args&&... args) const { + const int kNumArgs = sizeof...(Args); const int kArraySize = kNumArgs > 0 ? kNumArgs : 1; MXNetValue values[kArraySize]; int type_codes[kArraySize]; - detail::for_each(MXNetArgsSetter(values, type_codes), - std::forward(args)...); + detail::for_each(MXNetArgsSetter(values, type_codes), std::forward(args)...); MXNetRetValue rv; body_(MXNetArgs(values, type_codes, kNumArgs), &rv); return rv; } namespace detail { -template +template struct unpack_call_dispatcher { - template + template static void run(const F& f, const MXNetArgs& args_pack, MXNetRetValue* rv, Args&&... unpacked_args) { - unpack_call_dispatcher - ::run(f, args_pack, rv, - std::forward(unpacked_args)..., - args_pack[index]); + unpack_call_dispatcher::run( + f, args_pack, rv, std::forward(unpacked_args)..., args_pack[index]); } }; -template +template struct unpack_call_dispatcher { - template + template static void run(const F& f, const MXNetArgs& args_pack, MXNetRetValue* rv, @@ -1131,9 +1124,9 @@ struct unpack_call_dispatcher { } }; -template +template struct unpack_call_dispatcher { - template + template static void run(const F& f, const MXNetArgs& args_pack, MXNetRetValue* rv, @@ -1142,62 +1135,60 @@ struct unpack_call_dispatcher { } }; -template +template inline void unpack_call(const F& f, const MXNetArgs& args, MXNetRetValue* rv) { unpack_call_dispatcher::run(f, args, rv); } -template -inline R call_packed(const PackedFunc& pf, Args&& ...args) { +template +inline R call_packed(const PackedFunc& pf, Args&&... args) { return R(pf(std::forward(args)...)); } -template +template struct typed_packed_call_dispatcher { - template - static inline R run(const PackedFunc& pf, Args&& ...args) { + template + static inline R run(const PackedFunc& pf, Args&&... args) { return pf(std::forward(args)...); } }; -template<> +template <> struct typed_packed_call_dispatcher { - template - static inline void run(const PackedFunc& pf, Args&& ...args) { + template + static inline void run(const PackedFunc& pf, Args&&... args) { pf(std::forward(args)...); } }; } // namespace detail -template -TypedPackedFunc::TypedPackedFunc(PackedFunc packed) - : packed_(packed) {} +template +TypedPackedFunc::TypedPackedFunc(PackedFunc packed) : packed_(packed) {} -template +template TypedPackedFunc::TypedPackedFunc(const MXNetRetValue& value) : packed_(value.operator PackedFunc()) {} -template +template TypedPackedFunc::TypedPackedFunc(const MXNetArgValue& value) : packed_(value.operator PackedFunc()) {} -template -template +template +template inline void TypedPackedFunc::AssignTypedLambda(FType flambda) { packed_ = PackedFunc([flambda](const MXNetArgs& args, MXNetRetValue* rv) { - detail::unpack_call(flambda, args, rv); - }); + detail::unpack_call(flambda, args, rv); + }); } -template +template inline R TypedPackedFunc::operator()(Args... args) const { - return detail::typed_packed_call_dispatcher - ::run(packed_, std::forward(args)...); + return detail::typed_packed_call_dispatcher::run(packed_, std::forward(args)...); } // extension and node type handling namespace detail { -template +template struct MXNetValueCast { static T Apply(const TSrc* self) { static_assert(!is_ext && !is_nd, "The default case accepts only non-extensions"); @@ -1223,13 +1214,17 @@ struct PackedFuncValueConverter { * \param val The argument value. * \return the converted result. */ - static TObjectRef From(const MXNetArgValue& val) { return val.AsObjectRef(); } + static TObjectRef From(const MXNetArgValue& val) { + return val.AsObjectRef(); + } /*! * \brief Convert a TObjectRef from a return value. * \param val The argument value. * \return the converted result. */ - static TObjectRef From(const MXNetRetValue& val) { return val.AsObjectRef(); } + static TObjectRef From(const MXNetRetValue& val) { + return val.AsObjectRef(); + } }; template <> @@ -1283,8 +1278,8 @@ inline MXNetArgValue::operator T() const { template inline bool MXNetPODValue_::IsObjectRef() const { using ContainerType = typename TObjectRef::ContainerType; - return type_code_ == kObjectHandle && - ObjectTypeChecker::Check(static_cast(value_.v_handle)); + return type_code_ == kObjectHandle && + ObjectTypeChecker::Check(static_cast(value_.v_handle)); } inline bool String::CanConvertFrom(const MXNetArgValue& val) { diff --git a/include/mxnet/runtime/py_arg.h b/include/mxnet/runtime/py_arg.h index 81d1b30a573e..fa8b1adb9ac0 100644 --- a/include/mxnet/runtime/py_arg.h +++ b/include/mxnet/runtime/py_arg.h @@ -28,10 +28,11 @@ namespace runtime { class PythonArg { public: - explicit PythonArg(int offset): offset_(offset) {} + explicit PythonArg(int offset) : offset_(offset) {} int offset() const { return offset_; } + private: int offset_; }; diff --git a/include/mxnet/runtime/registry.h b/include/mxnet/runtime/registry.h index 70782b47254d..a59dc24ba208 100644 --- a/include/mxnet/runtime/registry.h +++ b/include/mxnet/runtime/registry.h @@ -80,7 +80,7 @@ class Registry { * \tparam FType the signature of the function. * \tparam FLambda The type of f. */ - template + template Registry& set_body_typed(FLambda f) { return set_body(TypedPackedFunc(f).packed()); } @@ -89,7 +89,8 @@ class Registry { * \brief set the body of the function to the given function pointer. * Note that this doesn't work with lambdas, you need to * explicitly give a type for those. - * Note that this will ignore default arg values and always require all arguments to be provided. + * Note that this will ignore default arg values and always require all arguments to be + * provided. * * \code * @@ -106,14 +107,15 @@ class Registry { * \tparam R the return type of the function (inferred). * \tparam Args the argument types of the function (inferred). */ - template + template Registry& set_body_typed(R (*f)(Args...)) { return set_body(TypedPackedFunc(f)); } /*! * \brief set the body of the function to be the passed method pointer. - * Note that this will ignore default arg values and always require all arguments to be provided. + * Note that this will ignore default arg values and always require all arguments to be + * provided. * * \code * @@ -131,7 +133,7 @@ class Registry { * \tparam R the return type of the function (inferred). * \tparam Args the argument types of the function (inferred). */ - template + template Registry& set_body_method(R (T::*f)(Args...)) { return set_body_typed([f](T target, Args... params) -> R { // call method pointer @@ -141,7 +143,8 @@ class Registry { /*! * \brief set the body of the function to be the passed method pointer. - * Note that this will ignore default arg values and always require all arguments to be provided. + * Note that this will ignore default arg values and always require all arguments to be + * provided. * * \code * @@ -159,7 +162,7 @@ class Registry { * \tparam R the return type of the function (inferred). * \tparam Args the argument types of the function (inferred). */ - template + template Registry& set_body_method(R (T::*f)(Args...) const) { return set_body_typed([f](const T target, Args... params) -> R { // call method pointer @@ -170,7 +173,8 @@ class Registry { /*! * \brief set the body of the function to be the passed method pointer. * Used when calling a method on a Node subclass through a ObjectRef subclass. - * Note that this will ignore default arg values and always require all arguments to be provided. + * Note that this will ignore default arg values and always require all arguments to be + * provided. * * \code * @@ -197,8 +201,11 @@ class Registry { * \tparam R the return type of the function (inferred). * \tparam Args the argument types of the function (inferred). */ - template::value>::type> + template ::value>::type> Registry& set_body_method(R (TNode::*f)(Args...)) { return set_body_typed([f](TObjectRef ref, Args... params) { TNode* target = ref.operator->(); @@ -210,7 +217,8 @@ class Registry { /*! * \brief set the body of the function to be the passed method pointer. * Used when calling a method on a Node subclass through a ObjectRef subclass. - * Note that this will ignore default arg values and always require all arguments to be provided. + * Note that this will ignore default arg values and always require all arguments to be + * provided. * * \code * @@ -237,8 +245,11 @@ class Registry { * \tparam R the return type of the function (inferred). * \tparam Args the argument types of the function (inferred). */ - template::value>::type> + template ::value>::type> Registry& set_body_method(R (TNode::*f)(Args...) const) { return set_body_typed([f](TObjectRef ref, Args... params) { const TNode* target = ref.operator->(); @@ -292,10 +303,10 @@ class Registry { #endif #define MXNET_STR_CONCAT_(__x, __y) __x##__y -#define MXNET_STR_CONCAT(__x, __y) MXNET_STR_CONCAT_(__x, __y) +#define MXNET_STR_CONCAT(__x, __y) MXNET_STR_CONCAT_(__x, __y) -#define MXNET_FUNC_REG_VAR_DEF \ - static MXNET_ATTRIBUTE_UNUSED ::mxnet::runtime::Registry& __mk_ ## MXNET +#define MXNET_FUNC_REG_VAR_DEF \ + static MXNET_ATTRIBUTE_UNUSED ::mxnet::runtime::Registry& __mk_##MXNET /*! * \brief Register a function globally. @@ -305,8 +316,8 @@ class Registry { * }); * \endcode */ -#define MXNET_REGISTER_GLOBAL(OpName) \ - MXNET_STR_CONCAT(MXNET_FUNC_REG_VAR_DEF, __COUNTER__) = \ +#define MXNET_REGISTER_GLOBAL(OpName) \ + MXNET_STR_CONCAT(MXNET_FUNC_REG_VAR_DEF, __COUNTER__) = \ ::mxnet::runtime::Registry::Register(OpName) } // namespace runtime diff --git a/include/mxnet/storage.h b/include/mxnet/storage.h index 1cb35270f026..0d4964bfded9 100644 --- a/include/mxnet/storage.h +++ b/include/mxnet/storage.h @@ -31,8 +31,8 @@ namespace mxnet { -#define MXNET_STORAGE_DEFAULT_PROFILER_SCOPE_CSTR ":" -#define MXNET_STORAGE_DEFAULT_NAME_CSTR "unknown" +#define MXNET_STORAGE_DEFAULT_PROFILER_SCOPE_CSTR ":" +#define MXNET_STORAGE_DEFAULT_NAME_CSTR "unknown" /*! * \brief Storage manager across multiple devices. @@ -70,7 +70,7 @@ class Storage { * \brief Id for IPC shared memory */ int shared_pid{-1}; - int shared_id {-1}; + int shared_id{-1}; /*! * \brief Attributes for tracking storage allocations. */ @@ -92,7 +92,7 @@ class Storage { Handle Alloc(size_t size, Context ctx, bool failsafe = false) { Handle hd; hd.size = size; - hd.ctx = ctx; + hd.ctx = ctx; this->Alloc(&hd, failsafe); return hd; } @@ -122,12 +122,12 @@ class Storage { */ virtual void DirectFree(Handle handle) = 0; /*! - * \brief Release all memory from device if using a pooled storage manager - * - * This release all memory from pool storage managers such as - * GPUPooledStorageManager and GPUPooledRoundedStorageManager. - * For non-pool memory managers this has no effect. - */ + * \brief Release all memory from device if using a pooled storage manager + * + * This release all memory from pool storage managers such as + * GPUPooledStorageManager and GPUPooledRoundedStorageManager. + * For non-pool memory managers this has no effect. + */ virtual void ReleaseAll(Context ctx) = 0; /*! * \brief Destructor. diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h old mode 100755 new mode 100644 index 8fdc3cd6e2ac..479b3cf3a260 --- a/include/mxnet/tensor_blob.h +++ b/include/mxnet/tensor_blob.h @@ -64,18 +64,17 @@ class NDArray; */ class TBlob { friend class NDArray; + public: /*! \brief pointer to the data */ - void *dptr_; + void* dptr_; /*! \brief shape of the tensor */ mxnet::TShape shape_; /*! \brief type flag of the tensor blob */ int type_flag_; /*! \brief default constructor, default copy assign will work */ - TBlob(void) - : dptr_(nullptr), - type_flag_(mshadow::DataType::kFlag) { + TBlob(void) : dptr_(nullptr), type_flag_(mshadow::DataType::kFlag) { SetDLTensor(cpu::kDevMask, 0); } /*! @@ -85,10 +84,9 @@ class TBlob { * \param dev_mask the device mask, can be cpu::kDevMask or gpu::kDevMask * \param dev_id the device id */ - template - TBlob(DType *dptr, const mxnet::TShape &shape, int dev_mask, int dev_id = -1) - : dptr_(dptr), shape_(shape), - type_flag_(mshadow::DataType::kFlag) { + template + TBlob(DType* dptr, const mxnet::TShape& shape, int dev_mask, int dev_id = -1) + : dptr_(dptr), shape_(shape), type_flag_(mshadow::DataType::kFlag) { SetDLTensor(dev_mask, dev_id); } /*! @@ -99,7 +97,7 @@ class TBlob { * \param type_flag the type flag. Can be one of enum mshadow::dtype * \param dev_id the device id */ - TBlob(void *dptr, const mxnet::TShape &shape, int dev_mask, int type_flag, int dev_id = -1) + TBlob(void* dptr, const mxnet::TShape& shape, int dev_mask, int type_flag, int dev_id = -1) : dptr_(dptr), shape_(shape), type_flag_(type_flag) { SetDLTensor(dev_mask, dev_id); } @@ -107,7 +105,7 @@ class TBlob { * \brief constructor that construct TBlob from DLTensor * \param DLTensor Object */ - explicit TBlob(const DLTensor &dltensor) + explicit TBlob(const DLTensor& dltensor) : dptr_(dltensor.data), shape_(mxnet::TShape(dltensor.shape, dltensor.shape + dltensor.ndim)), type_flag_(DLDataTypeTransform(dltensor.dtype)), @@ -115,9 +113,9 @@ class TBlob { // compactness check for DLTensor if (dltensor.strides != nullptr) { // check strides - const int &ndim = dltensor.ndim; - const int64_t *shape = dltensor.shape; - const int64_t *strides = dltensor.strides; + const int& ndim = dltensor.ndim; + const int64_t* shape = dltensor.shape; + const int64_t* strides = dltensor.strides; if (ndim >= 1) { bool err = false; if (strides[ndim - 1] != 1) { @@ -143,15 +141,15 @@ class TBlob { * \tparam dim tensor dimension * \tparam DType the type of elements in the tensor */ - template - TBlob(const mshadow::Tensor &src) { // NOLINT(*) + template + TBlob(const mshadow::Tensor& src) { // NOLINT(*) *this = src; } /*! * \brief constructor from TBlob (copy constructor) * \param src source TBlob */ - TBlob(const TBlob &src): dptr_(src.dptr_), shape_(src.shape_), type_flag_(src.type_flag_) { + TBlob(const TBlob& src) : dptr_(src.dptr_), shape_(src.shape_), type_flag_(src.type_flag_) { this->SetDLTensor(src.dev_mask(), src.dev_id()); } /*! @@ -162,10 +160,10 @@ class TBlob { * \tparam DType the type of elements in the tensor * \return reference of self */ - template - inline TBlob &operator=(const mshadow::Tensor &src) { - dptr_ = src.dptr_; - shape_ = src.shape_; + template + inline TBlob& operator=(const mshadow::Tensor& src) { + dptr_ = src.dptr_; + shape_ = src.shape_; type_flag_ = mshadow::DataType::kFlag; SetDLTensor(Device::kDevMask, -1); return *this; @@ -175,9 +173,9 @@ class TBlob { * \param src source TBlob * \return reference of self */ - inline TBlob &operator=(const TBlob &src) { - dptr_ = src.dptr_; - shape_ = src.shape_; + inline TBlob& operator=(const TBlob& src) { + dptr_ = src.dptr_; + shape_ = src.shape_; type_flag_ = src.type_flag_; SetDLTensor(src.dev_mask(), src.dev_id()); return *this; @@ -194,8 +192,8 @@ class TBlob { * \return reshaped blob */ inline TBlob reshape(const mxnet::TShape& shape) const { - CHECK_EQ(this->shape_.Size(), shape.Size()) << "Shape size mismatch " - << this->shape_.Size() << " v.s. " << shape.Size(); + CHECK_EQ(this->shape_.Size(), shape.Size()) + << "Shape size mismatch " << this->shape_.Size() << " v.s. " << shape.Size(); TBlob ret(this->dptr_, shape, this->dev_mask(), this->type_flag_, this->dev_id()); return ret; } @@ -206,18 +204,16 @@ class TBlob { * \tparam DType the type of elements in the tensor * \return tensor after flatten */ - template + template inline mshadow::Tensor FlatTo2D( - mshadow::Stream *stream = nullptr) const { + mshadow::Stream* stream = nullptr) const { CHECK(Device::kDevMask == this->dev_mask()) - << "TBlob.get: device type do not match specified type"; + << "TBlob.get: device type do not match specified type"; CHECK(mshadow::DataType::kFlag == type_flag_) - << "TBlob.get_with_shape: data type do not match specified type." - << "Expected: " << mshadow::dtype_string(type_flag_) - << " v.s. given " << mshadow::dtype_string(mshadow::DataType::kFlag); - return mshadow::Tensor(static_cast(dptr_), - shape_.FlatTo2D(), - stream); + << "TBlob.get_with_shape: data type do not match specified type." + << "Expected: " << mshadow::dtype_string(type_flag_) << " v.s. given " + << mshadow::dtype_string(mshadow::DataType::kFlag); + return mshadow::Tensor(static_cast(dptr_), shape_.FlatTo2D(), stream); } /*! * \brief flatten the tensor to 1 dimension, collapse all the dimensions together. @@ -226,11 +222,10 @@ class TBlob { * \tparam DType the type of elements in the tensor * \return tensor after flatten */ - template + template inline mshadow::Tensor FlatTo1D( - mshadow::Stream *stream = nullptr) const { - return this->get_with_shape( - mshadow::Shape1(shape_.Size()), stream); + mshadow::Stream* stream = nullptr) const { + return this->get_with_shape(mshadow::Shape1(shape_.Size()), stream); } /*! \brief return number of dimension of the tensor inside */ inline int ndim(void) const { @@ -250,12 +245,12 @@ class TBlob { return shape_.Size(); } /*! \brief get pointer in dtype */ - template + template inline DType* dptr() const { CHECK(mshadow::DataType::kFlag == type_flag_) - << "TBlob.get_with_shape: data type do not match specified type." - << "Expected: " << mshadow::dtype_string(type_flag_) - << " v.s. given " << mshadow::dtype_string(mshadow::DataType::kFlag); + << "TBlob.get_with_shape: data type do not match specified type." + << "Expected: " << mshadow::dtype_string(type_flag_) << " v.s. given " + << mshadow::dtype_string(mshadow::DataType::kFlag); return static_cast(dptr_); } /*! \brief device mask of the corresponding device */ @@ -283,12 +278,12 @@ class TBlob { * \tparam dim dimension of the tensor * \tparam DType the type of elements in the tensor */ - template - inline mshadow::Tensor get(mshadow::Stream *stream = nullptr) const { + template + inline mshadow::Tensor get(mshadow::Stream* stream = nullptr) const { CHECK(Device::kDevMask == this->dev_mask()) - << "TBlob.get: device type do not match specified type"; - return mshadow::Tensor(dptr(), - shape_.get(), shape_[shape_.ndim() - 1], stream); + << "TBlob.get: device type do not match specified type"; + return mshadow::Tensor( + dptr(), shape_.get(), shape_[shape_.ndim() - 1], stream); } /*! * \brief fetch a tensor in given shape @@ -300,17 +295,16 @@ class TBlob { * \tparam dim dimension of the tensor * \tparam DType the type of elements in the tensor */ - template + template inline mshadow::Tensor get_with_shape( - const mshadow::Shape &shape, - mshadow::Stream *stream = nullptr) const { + const mshadow::Shape& shape, + mshadow::Stream* stream = nullptr) const { CHECK(Device::kDevMask == this->dev_mask()) - << "TBlob.get: device type do not match specified type"; + << "TBlob.get: device type do not match specified type"; CHECK_EQ(this->CheckContiguous(), true) << "TBlob.get_reshape: must be contiguous"; CHECK_EQ(this->shape_.Size(), static_cast(shape.Size())) - << "TBlob.get_with_shape: new and old shape do not match total elements"; - return mshadow::Tensor(dptr(), shape, - shape[dim - 1], stream); + << "TBlob.get_with_shape: new and old shape do not match total elements"; + return mshadow::Tensor(dptr(), shape, shape[dim - 1], stream); } /*! * \brief flatten the tensor to 3 dimension, @@ -321,11 +315,11 @@ class TBlob { * \tparam DType the type of elements in the tensor * \return tensor after flatten */ - template + template inline mshadow::Tensor FlatTo3D( - int axis, mshadow::Stream *stream = nullptr) const { - return this->get_with_shape( - this->shape_.FlatTo3D(axis), stream); + int axis, + mshadow::Stream* stream = nullptr) const { + return this->get_with_shape(this->shape_.FlatTo3D(axis), stream); } /*! * \brief flatten the tensor to 3 dimension, @@ -337,12 +331,11 @@ class TBlob { * \tparam DType the type of elements in the tensor * \return tensor after flatten */ - template - inline mshadow::Tensor FlatTo3D( - int axis_begin, int axis_end, - mshadow::Stream *stream = nullptr) const { - return this->get_with_shape( - this->shape_.FlatTo3D(axis_begin, axis_end), stream); + template + inline mshadow::Tensor + FlatTo3D(int axis_begin, int axis_end, mshadow::Stream* stream = nullptr) const { + return this->get_with_shape(this->shape_.FlatTo3D(axis_begin, axis_end), + stream); } /*! * \brief flatten the tensor to specified number of dimensions, @@ -353,9 +346,9 @@ class TBlob { * \tparam DType the type of elements in the tensor * \return tensor after flatten */ - template + template inline mshadow::Tensor FlatToKD( - mshadow::Stream *stream = nullptr) const { + mshadow::Stream* stream = nullptr) const { mshadow::Shape shape; shape[0] = 1; // Pad higher dimensions in case dim > ndim() @@ -376,19 +369,32 @@ class TBlob { private: static DLDataType DTypeTransform(int type_flag) { switch (type_flag) { - case mshadow::kFloat32: return DLDataType{kDLFloat, 32, 1}; - case mshadow::kFloat64: return DLDataType{kDLFloat, 64, 1}; - case mshadow::kFloat16: return DLDataType{kDLFloat, 16, 1}; - case mshadow::kBfloat16: return DLDataType{kDLBfloat, 16, 1}; - case mshadow::kUint8: return DLDataType{kDLUInt, 8, 1}; - case mshadow::kInt32: return DLDataType{kDLInt, 32, 1}; - case mshadow::kInt8: return DLDataType{kDLInt, 8, 1}; - case mshadow::kInt64: return DLDataType{kDLInt, 64, 1}; - case mshadow::kBool: return DLDataType{kDLUInt, 1, 1}; - case mshadow::kInt16: return DLDataType{kDLInt, 16, 1}; - case mshadow::kUint16: return DLDataType{kDLUInt, 16, 1}; - case mshadow::kUint32: return DLDataType{kDLUInt, 32, 1}; - case mshadow::kUint64: return DLDataType{kDLUInt, 64, 1}; + case mshadow::kFloat32: + return DLDataType{kDLFloat, 32, 1}; + case mshadow::kFloat64: + return DLDataType{kDLFloat, 64, 1}; + case mshadow::kFloat16: + return DLDataType{kDLFloat, 16, 1}; + case mshadow::kBfloat16: + return DLDataType{kDLBfloat, 16, 1}; + case mshadow::kUint8: + return DLDataType{kDLUInt, 8, 1}; + case mshadow::kInt32: + return DLDataType{kDLInt, 32, 1}; + case mshadow::kInt8: + return DLDataType{kDLInt, 8, 1}; + case mshadow::kInt64: + return DLDataType{kDLInt, 64, 1}; + case mshadow::kBool: + return DLDataType{kDLUInt, 1, 1}; + case mshadow::kInt16: + return DLDataType{kDLInt, 16, 1}; + case mshadow::kUint16: + return DLDataType{kDLUInt, 16, 1}; + case mshadow::kUint32: + return DLDataType{kDLUInt, 32, 1}; + case mshadow::kUint64: + return DLDataType{kDLUInt, 64, 1}; default: { LOG(FATAL) << "Unknown type_flag=" << type_flag; return DLDataType(); @@ -402,47 +408,59 @@ class TBlob { switch (dldata_type.code) { case kDLFloat: switch (dldata_type.bits) { - case 16: return mshadow::kFloat16; - case 32: return mshadow::kFloat32; - case 64: return mshadow::kFloat64; + case 16: + return mshadow::kFloat16; + case 32: + return mshadow::kFloat32; + case 64: + return mshadow::kFloat64; } break; case kDLBfloat: switch (dldata_type.bits) { - case 16: return mshadow::kBfloat16; + case 16: + return mshadow::kBfloat16; } break; case kDLUInt: switch (dldata_type.bits) { - case 1: return mshadow::kBool; - case 8: return mshadow::kUint8; - case 16: return mshadow::kUint16; - case 32: return mshadow::kUint32; - case 64: return mshadow::kUint64; + case 1: + return mshadow::kBool; + case 8: + return mshadow::kUint8; + case 16: + return mshadow::kUint16; + case 32: + return mshadow::kUint32; + case 64: + return mshadow::kUint64; } break; case kDLInt: switch (dldata_type.bits) { - case 8: return mshadow::kInt8; - case 16: return mshadow::kInt16; - case 32: return mshadow::kInt32; - case 64: return mshadow::kInt64; + case 8: + return mshadow::kInt8; + case 16: + return mshadow::kInt16; + case 32: + return mshadow::kInt32; + case 64: + return mshadow::kInt64; } break; } - LOG(FATAL) << "Unknown DLDataType{" << dldata_type.code - << ", " << dldata_type.bits - << ", " << dldata_type.lanes << "}"; + LOG(FATAL) << "Unknown DLDataType{" << dldata_type.code << ", " << dldata_type.bits << ", " + << dldata_type.lanes << "}"; return mshadow::kFloat32; } inline void SetDLTensor(int dev_mask, int dev_id) { - dltensor_.data = dptr_; - dltensor_.ctx = DLContext{static_cast(dev_mask), dev_id}; - dltensor_.ndim = shape_.ndim(); - dltensor_.dtype = DTypeTransform(type_flag_); - dltensor_.shape = shape_.data(); - dltensor_.strides = nullptr; + dltensor_.data = dptr_; + dltensor_.ctx = DLContext{static_cast(dev_mask), dev_id}; + dltensor_.ndim = shape_.ndim(); + dltensor_.dtype = DTypeTransform(type_flag_); + dltensor_.shape = shape_.data(); + dltensor_.strides = nullptr; dltensor_.byte_offset = 0; } @@ -462,22 +480,21 @@ DMLC_DECLARE_TYPE_NAME(nnvm::Tuple>, "Shape(tuple)"); namespace parameter { -template<> -class FieldEntry - : public FieldEntryBase, mxnet::TShape> { +template <> +class FieldEntry : public FieldEntryBase, mxnet::TShape> { public: FieldEntry() : enforce_nonzero_(false), expect_ndim_(0) {} // parent class typedef FieldEntryBase, mxnet::TShape> Parent; - virtual void Check(void *head) const { + virtual void Check(void* head) const { Parent::Check(head); - mxnet::TShape &v = this->Get(head); + mxnet::TShape& v = this->Get(head); if (expect_ndim_ != 0 && v.ndim() != expect_ndim_) { std::ostringstream os; - os << "value " << v << "for Parameter " << this->key_ - << " has wrong dimensions, expected dimension=" << expect_ndim_; - throw dmlc::ParamError(os.str()); + os << "value " << v << "for Parameter " << this->key_ + << " has wrong dimensions, expected dimension=" << expect_ndim_; + throw dmlc::ParamError(os.str()); } if (enforce_nonzero_) { for (int i = 0; i < v.ndim(); ++i) { @@ -490,11 +507,11 @@ class FieldEntry } } } - inline FieldEntry &enforce_nonzero() { + inline FieldEntry& enforce_nonzero() { this->enforce_nonzero_ = true; return this->self(); } - inline FieldEntry &set_expect_ndim(int ndim) { + inline FieldEntry& set_expect_ndim(int ndim) { expect_ndim_ = ndim; return this->self(); } diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index 798622b6ee2a..9fe30c0967a0 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -53,14 +53,14 @@ namespace mxnet { * \tparam ValueType The type of data stored inside tuple. * \sa TShape */ -template +template class Tuple { public: /*! \brief default constructor */ Tuple() = default; /*! \brief destructor */ inline ~Tuple() { - delete [] data_heap_; + delete[] data_heap_; } /*! * constructor to construct a tuple with all `value`. @@ -103,7 +103,7 @@ class Tuple { * \param src the source shape */ - inline Tuple(Tuple&& src) { // NOLINT(runtime/explicit) + inline Tuple(Tuple&& src) { // NOLINT(runtime/explicit) this->swap(src); } /*! @@ -112,9 +112,8 @@ class Tuple { * \param end end the end of the iterator * \tparam RandomAccessIterator iterator type */ - template - inline Tuple(RandomAccessIterator begin, - RandomAccessIterator end) { + template + inline Tuple(RandomAccessIterator begin, RandomAccessIterator end) { this->assign(begin, end); } @@ -133,9 +132,8 @@ class Tuple { * \param end end the end of the iterator * \tparam RandomAccessIterator iterator type */ - template - inline void assign(RandomAccessIterator begin, - RandomAccessIterator end) { + template + inline void assign(RandomAccessIterator begin, RandomAccessIterator end) { this->SetDim(end - begin); CHECK_GE(ndim(), 0); std::copy(begin, end, this->begin()); @@ -177,7 +175,7 @@ class Tuple { * \param init the source initializer list * \return reference of self */ - inline Tuple &operator=(std::initializer_list init) { + inline Tuple& operator=(std::initializer_list init) { this->assign(init.begin(), init.end()); return *this; } @@ -185,33 +183,35 @@ class Tuple { * \return whether two tuple equals * \param s the tuple to compare against */ - inline bool operator==(const Tuple &s) const { - if (ndim_ != s.ndim_) return false; - if (ndim() == -1) return true; + inline bool operator==(const Tuple& s) const { + if (ndim_ != s.ndim_) + return false; + if (ndim() == -1) + return true; return std::equal(begin(), end(), s.begin()); } /*! * \return whether two tuple not equal * \param s the tuple to compare against */ - inline bool operator!=(const Tuple &s) const { + inline bool operator!=(const Tuple& s) const { return !(*this == s); } /*! \return the begin data pointer to content of the tuple */ - inline const ValueType *begin() const { + inline const ValueType* begin() const { return ndim_ <= kStackCache ? data_stack_ : data_heap_; } /*! \return the begin data pointer to content of the tuple */ - inline ValueType *begin() { + inline ValueType* begin() { return ndim_ <= kStackCache ? data_stack_ : data_heap_; } /*! \return the data pointer to end of the tuple */ inline const ValueType* end() const { - return ndim_ <= kStackCache ? (data_stack_ + ndim_): (data_heap_ + ndim_); + return ndim_ <= kStackCache ? (data_stack_ + ndim_) : (data_heap_ + ndim_); } /*! \return the data pointer to end the tuple */ inline ValueType* end() { - return ndim_ <= kStackCache ? (data_stack_ + ndim_): (data_heap_ + ndim_); + return ndim_ <= kStackCache ? (data_stack_ + ndim_) : (data_heap_ + ndim_); } /*! \return number of dimension of the tuple */ inline int ndim() const { @@ -223,12 +223,12 @@ class Tuple { * \return the corresponding dimension size */ inline ValueType& operator[](int i) { - // it fixes the false alarm of assuming signed overflow does not occur - // when assuming that (X - c) > X is always false [-Werror=strict-overflow] - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wstrict-overflow" +// it fixes the false alarm of assuming signed overflow does not occur +// when assuming that (X - c) > X is always false [-Werror=strict-overflow] +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-overflow" CHECK(i >= 0 && i < ndim()) << "index = " << i << " must be in range [0, " << ndim() << ")"; - #pragma GCC diagnostic pop +#pragma GCC diagnostic pop return begin()[i]; } /*! @@ -237,12 +237,12 @@ class Tuple { * \return the corresponding dimension size */ inline const ValueType& operator[](int i) const { - // it fixes the false alarm of assuming signed overflow does not occur - // when assuming that (X - c) > X is always false [-Werror=strict-overflow] - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wstrict-overflow" +// it fixes the false alarm of assuming signed overflow does not occur +// when assuming that (X - c) > X is always false [-Werror=strict-overflow] +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-overflow" CHECK(i >= 0 && i < ndim()) << "index = " << i << " must be in range [0, " << ndim() << ")"; - #pragma GCC diagnostic pop +#pragma GCC diagnostic pop return begin()[i]; } /*! @@ -268,7 +268,7 @@ class Tuple { * \param t the tuple * \return the ostream */ - friend std::ostream &operator<<(std::ostream &os, const Tuple &t) { + friend std::ostream& operator<<(std::ostream& os, const Tuple& t) { if (t.ndim() == -1) { // If t is an unknown shape, return string "None". // This is consistent with returning unknown shape in Python and generating @@ -278,9 +278,10 @@ class Tuple { } os << '['; const ValueType* begin = t.begin(); - const ValueType* end = t.end(); + const ValueType* end = t.end(); for (const ValueType* it = begin; it != end; ++it) { - if (it != begin) os << ','; + if (it != begin) + os << ','; os << *it; } os << ']'; @@ -292,7 +293,7 @@ class Tuple { * \param t The tuple * \return the istream */ - friend std::istream &operator>>(std::istream &is, Tuple &t) { + friend std::istream& operator>>(std::istream& is, Tuple& t) { // get ( while (true) { char ch = is.peek(); @@ -304,7 +305,8 @@ class Tuple { return is; } is.get(); - if (ch == '(' || ch == '[') break; + if (ch == '(' || ch == '[') + break; if (!isspace(ch)) { if (ch == 'N') { std::string tmp_val; @@ -344,14 +346,17 @@ class Tuple { while (true) { ch = is.peek(); if (isspace(ch)) { - is.get(); continue; + is.get(); + continue; } if (ch == ')' || ch == ']') { - is.get(); break; + is.get(); + break; } break; } - if (ch == ')' || ch == ']') break; + if (ch == ')' || ch == ']') + break; } else if (ch == ')' || ch == ']') { break; } else { @@ -368,8 +373,8 @@ class Tuple { * \tparam DType data type that save to * \tparam TStream any stream type that have write */ - template - inline void Save(TStream *strm) const; + template + inline void Save(TStream* strm) const; /*! * \brief load the content from binary stream * \param strm the output stream @@ -377,8 +382,8 @@ class Tuple { * \tparam TStream any stream type that have write * \return whether the load is successful */ - template - inline bool Load(TStream *strm); + template + inline bool Load(TStream* strm); protected: // stack cache size @@ -394,21 +399,19 @@ class Tuple { // internal function to change the dimension inline void SetDim(int ndim) { CHECK_GE(ndim, -1) << "ndim cannot be less than -1, received " << ndim; - if (ndim > kStackCache && - ndim > num_heap_allocated_) { - delete [] data_heap_; - data_heap_ = new ValueType[ndim]; + if (ndim > kStackCache && ndim > num_heap_allocated_) { + delete[] data_heap_; + data_heap_ = new ValueType[ndim]; num_heap_allocated_ = ndim; } else if (ndim <= 0 && data_heap_ != nullptr) { - delete [] data_heap_; - data_heap_ = nullptr; + delete[] data_heap_; + data_heap_ = nullptr; num_heap_allocated_ = 0; } ndim_ = ndim; } }; - /*! brief check if a shape's ndim is known. */ inline bool ndim_is_known(const int ndim) { CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim; @@ -455,7 +458,7 @@ class TShape : public Tuple { * \brief copy constructor of TShape * \param s source shape. */ - inline TShape(const Tuple& s) { // NOLINT(*) + inline TShape(const Tuple& s) { // NOLINT(*) if (s.ndim() == -1) { this->SetDim(-1); } else { @@ -484,16 +487,16 @@ class TShape : public Tuple { * \param end end the end of the iterator * \tparam RandomAccessIterator iterator type */ - template::iterator_category, - std::random_access_iterator_tag>::value, int>::type = 0> - inline TShape(RandomAccessIterator begin, - RandomAccessIterator end) { + template ::iterator_category, + std::random_access_iterator_tag>::value, + int>::type = 0> + inline TShape(RandomAccessIterator begin, RandomAccessIterator end) { this->assign(begin, end); } - inline explicit TShape(const ObjectRef& src): Tuple(src) {} + inline explicit TShape(const ObjectRef& src) : Tuple(src) {} /*! * \brief assignment function from tshape * \param src source shape. @@ -513,14 +516,14 @@ class TShape : public Tuple { * \return self. */ inline TShape& operator=(Tuple&& src) { // NOLINT(*) - TShape(std::move(src)).swap(*this); // NOLINT(*) + TShape(std::move(src)).swap(*this); // NOLINT(*) return *this; } /*! \return total number of elements in the shape */ inline size_t Size() const { CHECK(ndim_is_known(this->ndim())) << "Shape is unknown."; - dim_t size = 1; - const dim_t* start = begin(), *fin = end(); + dim_t size = 1; + const dim_t *start = begin(), *fin = end(); for (const dim_t* it = start; it != fin; ++it) { CHECK(dim_size_is_known(*it)) << "Shape dim size cannot be a negative value " << *it; size *= *it; @@ -535,10 +538,10 @@ class TShape : public Tuple { inline size_t ProdShape(int dimstart, int dimend) const { CHECK(ndim_is_known(this->ndim())) << "Shape is unknown."; CHECK_GE(dimstart, 0) << "dimstart must be >= 0, while received " << dimstart; - CHECK_LE(dimend, this->ndim()) << "dimend must be <= " << this->ndim() - << ", while received " << dimend; - dim_t num = 1; - const dim_t *d = this->data(); + CHECK_LE(dimend, this->ndim()) + << "dimend must be <= " << this->ndim() << ", while received " << dimend; + dim_t num = 1; + const dim_t* d = this->data(); for (int i = dimstart; i < dimend; ++i) { CHECK(dim_size_is_known(d[i])) << "Shape dim size must be known, while received " << d[i]; num *= d[i]; @@ -546,21 +549,21 @@ class TShape : public Tuple { return num; } /*! \return the begin data pointer to content of the tuple */ - inline const dim_t *data() const { + inline const dim_t* data() const { return begin(); } /*! \return the begin data pointer to content of the tuple */ - inline dim_t *data() { + inline dim_t* data() { return begin(); } #ifdef MSHADOW_XINLINE - template - inline TShape(const mshadow::Shape &s) {// NOLINT(*) + template + inline TShape(const mshadow::Shape& s) { // NOLINT(*) this->assign(s.shape_, s.shape_ + dim); } - template - inline TShape(mshadow::Shape &&s) {// NOLINT(*) + template + inline TShape(mshadow::Shape&& s) { // NOLINT(*) this->assign(s.shape_, s.shape_ + dim); } /*! @@ -569,8 +572,8 @@ class TShape : public Tuple { * \tparam dim shape dimension * \return reference of self */ - template - inline TShape &operator=(const mshadow::Shape &shape) { + template + inline TShape& operator=(const mshadow::Shape& shape) { this->assign(shape.shape_, shape.shape_ + dim); return *this; } @@ -579,11 +582,10 @@ class TShape : public Tuple { * \return the shape requested * \tparam dim dimension of the tensor */ - template + template inline mshadow::Shape get() const { - CHECK_EQ(dim, ndim()) - << "dimension do not match target dimension " << dim << " vs " << ndim(); - const dim_t *d = this->data(); + CHECK_EQ(dim, ndim()) << "dimension do not match target dimension " << dim << " vs " << ndim(); + const dim_t* d = this->data(); mshadow::Shape s; for (int i = 0; i < dim; ++i) { s[i] = d[i]; @@ -597,10 +599,11 @@ class TShape : public Tuple { inline mshadow::Shape<2> FlatTo2D(void) const { mshadow::Shape<2> s; CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim"; - if (ndim() == 0) return mshadow::Shape2(1, 1); - const dim_t *d = this->data(); - s.shape_[1] = d[ndim() - 1]; - dim_t ymax = 1; + if (ndim() == 0) + return mshadow::Shape2(1, 1); + const dim_t* d = this->data(); + s.shape_[1] = d[ndim() - 1]; + dim_t ymax = 1; for (int i = 1; i < ndim(); ++i) { ymax *= d[i - 1]; } @@ -617,11 +620,12 @@ class TShape : public Tuple { CHECK(axis_end >= axis_begin); mshadow::Shape<3> s; CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim"; - if (ndim() == 0) return mshadow::Shape3(1, 1, 1); - const dim_t *d = this->data(); - s.shape_[0] = 1; - s.shape_[1] = 1; - s.shape_[2] = 1; + if (ndim() == 0) + return mshadow::Shape3(1, 1, 1); + const dim_t* d = this->data(); + s.shape_[0] = 1; + s.shape_[1] = 1; + s.shape_[2] = 1; for (int i = 0; i < axis_begin; ++i) { s.shape_[0] *= d[i]; @@ -642,11 +646,12 @@ class TShape : public Tuple { inline mshadow::Shape<3> FlatTo3D(int axis) const { return FlatTo3D(axis, axis); } - inline bool operator==(const TShape &s) const { - if (ndim() != s.ndim()) return false; + inline bool operator==(const TShape& s) const { + if (ndim() != s.ndim()) + return false; return std::equal(begin(), end(), s.begin()); } - inline bool operator!=(const TShape &s) const { + inline bool operator!=(const TShape& s) const { return !(*this == s); } /*! @@ -654,12 +659,14 @@ class TShape : public Tuple { * \param s the shape to compare against * \tparam dim dimension of the shape */ - template - inline bool operator==(const mshadow::Shape &s) const { - if (ndim_ != dim) return false; - const dim_t *d = dim <= kStackCache ? data_stack_ : data_heap_; + template + inline bool operator==(const mshadow::Shape& s) const { + if (ndim_ != dim) + return false; + const dim_t* d = dim <= kStackCache ? data_stack_ : data_heap_; for (size_t i = 0; i < dim; ++i) { - if (d[i] != s.shape_[i]) return false; + if (d[i] != s.shape_[i]) + return false; } return true; } @@ -668,8 +675,8 @@ class TShape : public Tuple { * \param s the shape to compare against * \tparam dim dimension of the shape */ - template - inline bool operator!=(const mshadow::Shape &s) const { + template + inline bool operator!=(const mshadow::Shape& s) const { return !(*this == s); } #endif @@ -690,25 +697,26 @@ inline bool dim_size_is_known(const TShape& x, const int idx) { /*! brief check if shape is known using the NumPy compatible definition. * zero-dim and zero-size tensors are valid. -1 means unknown.*/ inline bool shape_is_known(const TShape& x) { - if (!ndim_is_known(x)) return false; + if (!ndim_is_known(x)) + return false; for (int i = 0; i < x.ndim(); ++i) { - if (!dim_size_is_known(x, i)) return false; + if (!dim_size_is_known(x, i)) + return false; } return true; } inline bool shape_is_known(const std::vector& shapes) { for (const TShape& shape : shapes) { - if (!shape_is_known(shape)) return false; + if (!shape_is_known(shape)) + return false; } return true; } /*! \brief helper function to cast type of container elements */ -template -inline DstIter ShapeTypeCast(const SrcIter begin, - const SrcIter end, - DstIter dst_begin) { +template +inline DstIter ShapeTypeCast(const SrcIter begin, const SrcIter end, DstIter dst_begin) { typedef typename std::iterator_traits::value_type SrcDType; typedef typename std::iterator_traits::value_type DstDType; auto cast = [](const SrcDType& dim) { return static_cast(dim); }; @@ -716,7 +724,7 @@ inline DstIter ShapeTypeCast(const SrcIter begin, } /*! \brief helper function to transform a container to TShape with type cast */ -template +template inline TShape ShapeTypeCast(const SrcIter begin, const SrcIter end) { size_t ndim = std::distance(begin, end); TShape res(ndim, -1); @@ -725,9 +733,9 @@ inline TShape ShapeTypeCast(const SrcIter begin, const SrcIter end) { } /*! \tparam ValueType The type of data stored inside tuple. */ -template -template -inline void Tuple::Save(TStream *strm) const { +template +template +inline void Tuple::Save(TStream* strm) const { strm->Write(&ndim_, sizeof(ndim_)); if (typeid(DType) == typeid(ValueType)) { strm->Write(begin(), sizeof(ValueType) * ndim_); @@ -739,17 +747,20 @@ inline void Tuple::Save(TStream *strm) const { } /*! \tparam ValueType The type of data stored inside tuple. */ -template -template -inline bool Tuple::Load(TStream *strm) { - if (strm->Read(&ndim_, sizeof(ndim_)) != sizeof(ndim_)) return false; +template +template +inline bool Tuple::Load(TStream* strm) { + if (strm->Read(&ndim_, sizeof(ndim_)) != sizeof(ndim_)) + return false; this->SetDim(ndim_); size_t nread = sizeof(DType) * ndim_; if (typeid(DType) == typeid(ValueType)) { - if (strm->Read(begin(), nread) != nread) return false; + if (strm->Read(begin(), nread) != nread) + return false; } else { std::vector buffer(ndim_); - if (strm->Read(buffer.data(), nread) != nread) return false; + if (strm->Read(buffer.data(), nread) != nread) + return false; ShapeTypeCast(buffer.begin(), buffer.end(), begin()); } return true; @@ -759,8 +770,8 @@ inline bool Tuple::Load(TStream *strm) { namespace std { /*! \brief hash function for Tuple. */ -template -struct hash > { +template +struct hash> { /*! \brief hash a Tuple into unsigned int */ size_t operator()(const mxnet::Tuple& val) const { std::hash hash_int; @@ -773,7 +784,7 @@ struct hash > { }; /*! \brief hash function for TShape. */ -template<> +template <> struct hash { /*! \brief hash a TShape into unsigned int */ size_t operator()(const mxnet::TShape& val) const { @@ -793,8 +804,8 @@ DMLC_DECLARE_TYPE_NAME(optional, "Shape or None"); DMLC_DECLARE_TYPE_NAME(optional>, "Shape or None"); // avoid low version of MSVC #if !(defined(_MSC_VER) && _MSC_VER < 1900) -template -struct type_name_helper > { +template +struct type_name_helper> { static inline std::string value() { return "tuple of <" + type_name() + ">"; } From 76973336bd59f88ce4c06a8a4656d2bc3ecd3b8f Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 4 Nov 2021 09:01:34 +0100 Subject: [PATCH 04/10] [CPP-PACKAGE] Re-format .cc .h files --- .../multi_threaded_inference.cc | 152 ++-- cpp-package/example/utils.h | 56 +- cpp-package/include/mxnet-cpp/base.h | 16 +- cpp-package/include/mxnet-cpp/contrib.h | 135 ++-- cpp-package/include/mxnet-cpp/executor.h | 143 ++-- cpp-package/include/mxnet-cpp/initializer.h | 81 +-- cpp-package/include/mxnet-cpp/io.h | 69 +- cpp-package/include/mxnet-cpp/kvstore.h | 20 +- cpp-package/include/mxnet-cpp/lr_scheduler.h | 45 +- cpp-package/include/mxnet-cpp/metric.h | 41 +- cpp-package/include/mxnet-cpp/model.h | 19 +- cpp-package/include/mxnet-cpp/ndarray.h | 683 +++++++++--------- cpp-package/include/mxnet-cpp/op_map.h | 76 +- cpp-package/include/mxnet-cpp/op_suppl.h | 122 ++-- cpp-package/include/mxnet-cpp/op_util.h | 14 +- cpp-package/include/mxnet-cpp/operator.h | 186 ++--- cpp-package/include/mxnet-cpp/optimizer.h | 80 +- cpp-package/include/mxnet-cpp/shape.h | 351 +++++---- cpp-package/include/mxnet-cpp/symbol.h | 362 +++++----- 19 files changed, 1332 insertions(+), 1319 deletions(-) diff --git a/cpp-package/example/inference/multi_threaded_inference/multi_threaded_inference.cc b/cpp-package/example/inference/multi_threaded_inference/multi_threaded_inference.cc index e3b3909a609c..9b279e9c4315 100644 --- a/cpp-package/example/inference/multi_threaded_inference/multi_threaded_inference.cc +++ b/cpp-package/example/inference/multi_threaded_inference/multi_threaded_inference.cc @@ -20,7 +20,7 @@ /*! * \file multi_threaded_inference.cc * \brief Multi Threaded inference example with CachedOp -*/ + */ #include @@ -37,17 +37,14 @@ const float DEFAULT_MEAN = 117.0; - // Code to load image, PrintOutput results, helper functions for the same obtained from: // https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/predict-cpp/ -static std::string trim(const std::string &input) { +static std::string trim(const std::string& input) { auto not_space = [](int ch) { return !std::isspace(ch); }; - auto output = input; - output.erase(output.begin(), - std::find_if(output.begin(), output.end(), not_space)); - output.erase(std::find_if(output.rbegin(), output.rend(), not_space).base(), - output.end()); + auto output = input; + output.erase(output.begin(), std::find_if(output.begin(), output.end(), not_space)); + output.erase(std::find_if(output.rbegin(), output.rend(), not_space).base(), output.end()); return output; } @@ -77,24 +74,25 @@ void PrintOutputResult(const float* data, size_t size, const std::vector best_accuracy) { best_accuracy = data[i]; - best_idx = i; + best_idx = i; } } - std::cout << "Best Result: " << trim(synset[best_idx]) << " (id=" << best_idx << ", " << - "accuracy=" << std::setprecision(8) << best_accuracy << ")" << std::endl; + std::cout << "Best Result: " << trim(synset[best_idx]) << " (id=" << best_idx << ", " + << "accuracy=" << std::setprecision(8) << best_accuracy << ")" << std::endl; } - // Read Image data into a float array -void GetImageFile(const std::string &image_file, float *image_data, - int channels, cv::Size resize_size) { +void GetImageFile(const std::string& image_file, + float* image_data, + int channels, + cv::Size resize_size) { // Read all kinds of file into a BGR color 3 channels image cv::Mat im_ori = cv::imread(image_file, cv::IMREAD_COLOR); @@ -127,17 +125,17 @@ void GetImageFile(const std::string &image_file, float *image_data, } } -void prepare_input_data(const mxnet::cpp::Shape& shape, const mxnet::cpp::Context& ctx, +void prepare_input_data(const mxnet::cpp::Shape& shape, + const mxnet::cpp::Context& ctx, int num_threads, std::vector* data_arr, bool random_uniform = false) { for (size_t i = 0; i < num_threads; ++i) { data_arr->emplace_back(shape, ctx, false, 0); int begin = i * 100; - int end = begin + 100; + int end = begin + 100; if (random_uniform) { - mxnet::cpp::Operator("_random_uniform")(begin, end) - .Invoke((*data_arr)[i]); + mxnet::cpp::Operator("_random_uniform")(begin, end).Invoke((*data_arr)[i]); } mxnet::cpp::NDArray::WaitAll(); } @@ -146,46 +144,48 @@ void prepare_input_data(const mxnet::cpp::Shape& shape, const mxnet::cpp::Contex // Run inference on a model void run_inference(const std::string& model_name, const std::vector& input_arrs, - std::vector *output_mx_arr, - int num_inf_per_thread = 1, bool random_sleep = false, - int num_threads = 1, bool static_alloc = false, - bool static_shape = false, - bool is_gpu = false) { - LOG(INFO) << "Running inference for " + model_name + - " num_threads: " + std::to_string(num_threads) + - " num_inf_per_thread: " + std::to_string(num_inf_per_thread) + - " random_sleep: " + std::to_string(random_sleep) + - " static_alloc: " + std::to_string(static_alloc) + - " static_shape: " + std::to_string(static_shape); - std::string json_file = model_name + "-symbol.json"; - std::string param_file = model_name + "-0000.params"; - auto out = mxnet::cpp::Symbol::Load(json_file); + std::vector* output_mx_arr, + int num_inf_per_thread = 1, + bool random_sleep = false, + int num_threads = 1, + bool static_alloc = false, + bool static_shape = false, + bool is_gpu = false) { + LOG(INFO) << "Running inference for " + model_name + + " num_threads: " + std::to_string(num_threads) + + " num_inf_per_thread: " + std::to_string(num_inf_per_thread) + + " random_sleep: " + std::to_string(random_sleep) + + " static_alloc: " + std::to_string(static_alloc) + + " static_shape: " + std::to_string(static_shape); + std::string json_file = model_name + "-symbol.json"; + std::string param_file = model_name + "-0000.params"; + auto out = mxnet::cpp::Symbol::Load(json_file); std::string static_alloc_str = static_alloc ? "true" : "false"; std::string static_shape_str = static_shape ? "true" : "false"; // Prepare context -# if MXNET_USE_CUDA == 1 +#if MXNET_USE_CUDA == 1 mxnet::Context backend_ctx; mxnet::cpp::Context ctx = mxnet::cpp::Context::cpu(0); if (is_gpu) { backend_ctx = mxnet::Context::GPU(0); - ctx = mxnet::cpp::Context::gpu(0); + ctx = mxnet::cpp::Context::gpu(0); } else { backend_ctx = mxnet::Context::CPU(0); - ctx = mxnet::cpp::Context::cpu(0); + ctx = mxnet::cpp::Context::cpu(0); } -# else +#else mxnet::Context backend_ctx = mxnet::Context::CPU(0); - mxnet::cpp::Context ctx = mxnet::cpp::Context::cpu(0); + mxnet::cpp::Context ctx = mxnet::cpp::Context::cpu(0); #endif // Prepare input data and parameters std::vector data_arr(num_threads); std::vector softmax_arr; std::vector params; - mxnet::cpp::Shape data_shape = mxnet::cpp::Shape(1, 3, 224, 224); + mxnet::cpp::Shape data_shape = mxnet::cpp::Shape(1, 3, 224, 224); mxnet::cpp::Shape softmax_shape = mxnet::cpp::Shape(1); - int num_inputs = out.ListInputs().size(); + int num_inputs = out.ListInputs().size(); for (size_t i = 0; i < data_arr.size(); ++i) { data_arr[i] = input_arrs[i].Copy(ctx); @@ -207,16 +207,15 @@ void run_inference(const std::string& model_name, CachedOpHandle hdl = CachedOpHandle(); - std::vector flag_keys{"data_indices", "param_indices", - "static_alloc", "static_shape"}; + std::vector flag_keys{ + "data_indices", "param_indices", "static_alloc", "static_shape"}; std::string param_indices = "["; for (size_t i = 1; i < num_inputs; ++i) { param_indices += std::to_string(i); param_indices += std::string(", "); } param_indices += "]"; - std::vector flag_vals{"[0]", param_indices, static_alloc_str, - static_shape_str}; + std::vector flag_vals{"[0]", param_indices, static_alloc_str, static_shape_str}; std::vector flag_key_cstrs, flag_val_cstrs; flag_key_cstrs.reserve(flag_keys.size()); for (size_t i = 0; i < flag_keys.size(); ++i) { @@ -226,15 +225,14 @@ void run_inference(const std::string& model_name, flag_val_cstrs.emplace_back(flag_vals[i].c_str()); } - int ret1 = MXCreateCachedOp(out.GetHandle(), flag_keys.size(), - flag_key_cstrs.data(), flag_val_cstrs.data(), - &hdl, true); + int ret1 = MXCreateCachedOp( + out.GetHandle(), flag_keys.size(), flag_key_cstrs.data(), flag_val_cstrs.data(), &hdl, true); if (ret1 < 0) { LOG(FATAL) << MXGetLastError(); } // Prepare data structures and lambda to run in different threads - std::vector cached_op_handles(num_threads); + std::vector cached_op_handles(num_threads); std::vector> arr_handles(num_threads); for (size_t i = 0; i < num_threads; ++i) { @@ -255,32 +253,37 @@ void run_inference(const std::string& model_name, std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); } int num_output = 0; - const int *stypes; - int ret = MXInvokeCachedOp(hdl, arr_handles[num].size(), arr_handles[num].data(), - ctx.GetDeviceType(), 0, &num_output, - &(cached_op_handles[num]), &stypes); + const int* stypes; + int ret = MXInvokeCachedOp(hdl, + arr_handles[num].size(), + arr_handles[num].data(), + ctx.GetDeviceType(), + 0, + &num_output, + &(cached_op_handles[num]), + &stypes); if (ret < 0) { LOG(FATAL) << MXGetLastError(); } - (*output_mx_arr)[num] = static_cast(*cached_op_handles[num]); + (*output_mx_arr)[num] = static_cast(*cached_op_handles[num]); }; // Spawn multiple threads, join and wait for threads to complete std::vector worker_threads(num_threads); int count = 0; - for (auto &&i : worker_threads) { + for (auto&& i : worker_threads) { i = std::thread(func, count); count++; } - for (auto &&i : worker_threads) { + for (auto&& i : worker_threads) { i.join(); } mxnet::cpp::NDArray::WaitAll(); std::string synset_file = "synset.txt"; - auto synset = LoadSynset(synset_file); + auto synset = LoadSynset(synset_file); std::vector tmp(num_threads); for (size_t i = 0; i < num_threads; i++) { tmp[i] = (*output_mx_arr)[i]->Copy(mxnet::Context::CPU(0)); @@ -288,8 +291,9 @@ void run_inference(const std::string& model_name, (*output_mx_arr)[i] = &tmp[i]; } for (size_t i = 0; i < num_threads; ++i) { - PrintOutputResult(static_cast((*output_mx_arr)[i]->data().dptr_), - (*output_mx_arr)[i]->shape().Size(), synset); + PrintOutputResult(static_cast((*output_mx_arr)[i]->data().dptr_), + (*output_mx_arr)[i]->shape().Size(), + synset); } int ret2 = MXFreeCachedOp(hdl); if (ret2 < 0) { @@ -298,11 +302,10 @@ void run_inference(const std::string& model_name, mxnet::cpp::NDArray::WaitAll(); } -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { if (argc < 4) { std::cout << "Please provide a model name, is_gpu, test_image" << std::endl - << "Usage: ./multi_threaded_inference [model_name] [is_gpu] [file_names]" - << std::endl + << "Usage: ./multi_threaded_inference [model_name] [is_gpu] [file_names]" << std::endl << "Example: ./.multi_threaded_inference imagenet1k-inception-bn 0 apple.jpg" << std::endl << "NOTE: Thread number ordering will be based on the ordering of file inputs" @@ -311,21 +314,20 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } std::string model_name = std::string(argv[1]); - bool is_gpu = std::atoi(argv[2]); + bool is_gpu = std::atoi(argv[2]); CHECK(argc >= 4) << "Number of files provided should be atleast 1"; int num_threads = argc - 3; std::vector test_files; for (size_t i = 0; i < argc - 3; ++i) { test_files.emplace_back(argv[3 + i]); } - int epoch = 0; + int epoch = 0; bool static_alloc = true; bool static_shape = true; - // Image size and channels - size_t width = 224; - size_t height = 224; + size_t width = 224; + size_t height = 224; size_t channels = 3; size_t image_size = width * height * channels; @@ -337,18 +339,24 @@ int main(int argc, char *argv[]) { mxnet::cpp::Shape input_shape = mxnet::cpp::Shape(1, 3, 224, 224); for (size_t i = 0; i < files.size(); i++) { files[i].resize(image_size); - GetImageFile(test_files[i], files[i].data(), channels, - cv::Size(width, height)); - input_arrs.emplace_back(mxnet::cpp::NDArray(files[i].data(), - input_shape, mxnet::cpp::Context::cpu(0))); + GetImageFile(test_files[i], files[i].data(), channels, cv::Size(width, height)); + input_arrs.emplace_back( + mxnet::cpp::NDArray(files[i].data(), input_shape, mxnet::cpp::Context::cpu(0))); } // load symbol std::string static_alloc_str = static_alloc ? "true" : "false"; std::string static_shape_str = static_shape ? "true" : "false"; std::vector output_mx_arr(num_threads); - run_inference(model_name, input_arrs, &output_mx_arr, 1, false, num_threads, - static_alloc, static_shape, is_gpu); + run_inference(model_name, + input_arrs, + &output_mx_arr, + 1, + false, + num_threads, + static_alloc, + static_shape, + is_gpu); mxnet::cpp::NDArray::WaitAll(); return 0; diff --git a/cpp-package/example/utils.h b/cpp-package/example/utils.h index 87847701ce6e..887a807e5d12 100644 --- a/cpp-package/example/utils.h +++ b/cpp-package/example/utils.h @@ -27,50 +27,52 @@ using namespace mxnet::cpp; -#define TRY \ - try { -#define CATCH \ - } catch(dmlc::Error &err) { \ - LG << "Status: FAIL";\ +#define TRY try { +#define CATCH \ + } \ + catch (dmlc::Error & err) { \ + LG << "Status: FAIL"; \ LG << "With Error: " << MXGetLastError(); \ - return 1; \ + return 1; \ } -bool isFileExists(const std::string &filename) { +bool isFileExists(const std::string& filename) { std::ifstream fhandle(filename.c_str()); return fhandle.good(); } -bool check_datafiles(const std::vector &data_files) { - for (size_t index=0; index < data_files.size(); index++) { +bool check_datafiles(const std::vector& data_files) { + for (size_t index = 0; index < data_files.size(); index++) { if (!(isFileExists(data_files[index]))) { - LG << "Error: File does not exist: "<< data_files[index]; + LG << "Error: File does not exist: " << data_files[index]; return false; } } return true; } -bool setDataIter(MXDataIter *iter , const std::string &useType, - const std::vector &data_files, int batch_size) { - if (!check_datafiles(data_files)) { - return false; - } +bool setDataIter(MXDataIter* iter, + const std::string& useType, + const std::vector& data_files, + int batch_size) { + if (!check_datafiles(data_files)) { + return false; + } - iter->SetParam("batch_size", batch_size); - iter->SetParam("shuffle", 1); - iter->SetParam("flat", 1); + iter->SetParam("batch_size", batch_size); + iter->SetParam("shuffle", 1); + iter->SetParam("flat", 1); - if (useType == "Train") { - iter->SetParam("image", data_files[0]); - iter->SetParam("label", data_files[1]); - } else if (useType == "Label") { - iter->SetParam("image", data_files[2]); - iter->SetParam("label", data_files[3]); - } + if (useType == "Train") { + iter->SetParam("image", data_files[0]); + iter->SetParam("label", data_files[1]); + } else if (useType == "Label") { + iter->SetParam("image", data_files[2]); + iter->SetParam("label", data_files[3]); + } - iter->CreateDataIter(); - return true; + iter->CreateDataIter(); + return true; } #endif // CPP_PACKAGE_EXAMPLE_UTILS_H_ diff --git a/cpp-package/include/mxnet-cpp/base.h b/cpp-package/include/mxnet-cpp/base.h index 19375c0f81e8..ad1ab02c9619 100644 --- a/cpp-package/include/mxnet-cpp/base.h +++ b/cpp-package/include/mxnet-cpp/base.h @@ -18,10 +18,10 @@ */ /*! -* \file base.h -* \brief base definitions for mxnetcpp -* \author Chuntao Hong, Zhang Chen -*/ + * \file base.h + * \brief base definitions for mxnetcpp + * \author Chuntao Hong, Zhang Chen + */ #ifndef MXNET_CPP_BASE_H_ #define MXNET_CPP_BASE_H_ @@ -41,10 +41,10 @@ enum OpReqType { /*! \brief write gradient to provided space */ kWriteTo, /*! - * \brief perform an inplace write, - * Target shares memory with one of input arguments. - * This option only happen when - */ + * \brief perform an inplace write, + * Target shares memory with one of input arguments. + * This option only happen when + */ kWriteInplace, /*! \brief add to the provided space */ kAddTo diff --git a/cpp-package/include/mxnet-cpp/contrib.h b/cpp-package/include/mxnet-cpp/contrib.h index 21ca54014109..c6ca3b834b14 100644 --- a/cpp-package/include/mxnet-cpp/contrib.h +++ b/cpp-package/include/mxnet-cpp/contrib.h @@ -18,10 +18,10 @@ */ /*! -* \file contrib.h -* \brief utility function to enable some contrib features -* \author Haohuan Wang -*/ + * \file contrib.h + * \brief utility function to enable some contrib features + * \author Haohuan Wang + */ #ifndef MXNET_CPP_CONTRIB_H_ #define MXNET_CPP_CONTRIB_H_ @@ -35,76 +35,79 @@ namespace mxnet { namespace cpp { namespace details { - /*! - * split a string with the given delimiter - * @param str string to be parsed - * @param delimiter delimiter - * @return delimited list of string - */ - inline std::vector split(const std::string& str, const std::string& delimiter) { - std::vector splitted; - size_t last = 0; - size_t next = 0; - while ((next = str.find(delimiter, last)) != std::string::npos) { - splitted.push_back(str.substr(last, next - last)); - last = next + 1; - } - splitted.push_back(str.substr(last)); - return splitted; +/*! + * split a string with the given delimiter + * @param str string to be parsed + * @param delimiter delimiter + * @return delimited list of string + */ +inline std::vector split(const std::string& str, const std::string& delimiter) { + std::vector splitted; + size_t last = 0; + size_t next = 0; + while ((next = str.find(delimiter, last)) != std::string::npos) { + splitted.push_back(str.substr(last, next - last)); + last = next + 1; } + splitted.push_back(str.substr(last)); + return splitted; +} } // namespace details namespace contrib { - // needs to be same with - // https://github.com/apache/incubator-mxnet/blob/1c874cfc807cee755c38f6486e8e0f4d94416cd8/src/operator/subgraph/tensorrt/tensorrt-inl.h#L190 - static const std::string TENSORRT_SUBGRAPH_PARAM_IDENTIFIER = "subgraph_params_names"; - // needs to be same with - // https://github.com/apache/incubator-mxnet/blob/master/src/operator/subgraph/tensorrt/tensorrt.cc#L244 - static const std::string TENSORRT_SUBGRAPH_PARAM_PREFIX = "subgraph_param_"; - /*! - * this is a mimic to https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/tensorrt.py#L37 - * @param symbol symbol that already called subgraph api - * @param argParams original arg params, params needed by tensorrt will be removed after calling this function - * @param auxParams original aux params, params needed by tensorrt will be removed after calling this function - */ - inline void InitTensorRTParams(const mxnet::cpp::Symbol& symbol, - std::map *argParams, - std::map *auxParams) { - mxnet::cpp::Symbol internals = symbol.GetInternals(); - mx_uint numSymbol = internals.GetNumOutputs(); - for (mx_uint i = 0; i < numSymbol; ++i) { - std::map attrs = internals[i].ListAttributes(); - if (attrs.find(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER) != attrs.end()) { - std::string new_params_names; - std::map tensorrtParams; - std::vector keys = details::split( - attrs[TENSORRT_SUBGRAPH_PARAM_IDENTIFIER], ";"); - for (const auto& key : keys) { - if (argParams->find(key) != argParams->end()) { - new_params_names += key + ";"; - tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*argParams)[key]; - argParams->erase(key); - } else if (auxParams->find(key) != auxParams->end()) { - new_params_names += key + ";"; - tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*auxParams)[key]; - auxParams->erase(key); - } - } - std::map new_attrs = {}; - for (const auto& kv : tensorrtParams) { - // passing the ndarray address into TRT node attributes to get the weight - uint64_t address = reinterpret_cast(kv.second.GetHandle()); - new_attrs[kv.first] = std::to_string(address); - } - if (!new_attrs.empty()) { - internals[i].SetAttributes(new_attrs); - internals[i].SetAttribute(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER, - new_params_names.substr(0, new_params_names.length() - 1)); - } +// needs to be same with +// https://github.com/apache/incubator-mxnet/blob/1c874cfc807cee755c38f6486e8e0f4d94416cd8/src/operator/subgraph/tensorrt/tensorrt-inl.h#L190 +static const std::string TENSORRT_SUBGRAPH_PARAM_IDENTIFIER = "subgraph_params_names"; +// needs to be same with +// https://github.com/apache/incubator-mxnet/blob/master/src/operator/subgraph/tensorrt/tensorrt.cc#L244 +static const std::string TENSORRT_SUBGRAPH_PARAM_PREFIX = "subgraph_param_"; +/*! + * this is a mimic to + * https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/tensorrt.py#L37 + * @param symbol symbol that already called subgraph api + * @param argParams original arg params, params needed by tensorrt will be removed after calling + * this function + * @param auxParams original aux params, params needed by tensorrt will be removed after calling + * this function + */ +inline void InitTensorRTParams(const mxnet::cpp::Symbol& symbol, + std::map* argParams, + std::map* auxParams) { + mxnet::cpp::Symbol internals = symbol.GetInternals(); + mx_uint numSymbol = internals.GetNumOutputs(); + for (mx_uint i = 0; i < numSymbol; ++i) { + std::map attrs = internals[i].ListAttributes(); + if (attrs.find(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER) != attrs.end()) { + std::string new_params_names; + std::map tensorrtParams; + std::vector keys = + details::split(attrs[TENSORRT_SUBGRAPH_PARAM_IDENTIFIER], ";"); + for (const auto& key : keys) { + if (argParams->find(key) != argParams->end()) { + new_params_names += key + ";"; + tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*argParams)[key]; + argParams->erase(key); + } else if (auxParams->find(key) != auxParams->end()) { + new_params_names += key + ";"; + tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*auxParams)[key]; + auxParams->erase(key); } + } + std::map new_attrs = {}; + for (const auto& kv : tensorrtParams) { + // passing the ndarray address into TRT node attributes to get the weight + uint64_t address = reinterpret_cast(kv.second.GetHandle()); + new_attrs[kv.first] = std::to_string(address); + } + if (!new_attrs.empty()) { + internals[i].SetAttributes(new_attrs); + internals[i].SetAttribute(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER, + new_params_names.substr(0, new_params_names.length() - 1)); + } } + } } } // namespace contrib diff --git a/cpp-package/include/mxnet-cpp/executor.h b/cpp-package/include/mxnet-cpp/executor.h index 9b413e1a60fd..fff559b79df3 100644 --- a/cpp-package/include/mxnet-cpp/executor.h +++ b/cpp-package/include/mxnet-cpp/executor.h @@ -18,10 +18,10 @@ */ /*! -* \file executor.h -* \brief executor definition -* \author Chuntao Hong, Zhang Chen -*/ + * \file executor.h + * \brief executor definition + * \author Chuntao Hong, Zhang Chen + */ #ifndef MXNET_CPP_EXECUTOR_H_ #define MXNET_CPP_EXECUTOR_H_ @@ -40,43 +40,51 @@ namespace cpp { class Optimizer; /*! -* \brief Executor interface -*/ + * \brief Executor interface + */ class Executor { public: - Executor(const Symbol &symbol, Context context, - const std::vector &arg_arrays, - const std::vector &grad_arrays, - const std::vector &grad_reqs, - const std::vector &aux_arrays, - const std::map &group_to_ctx = - std::map(), - Executor *shared_exec = nullptr); - explicit Executor(const CachedOpHandle &h) { handle_ = h; } + Executor(const Symbol& symbol, + Context context, + const std::vector& arg_arrays, + const std::vector& grad_arrays, + const std::vector& grad_reqs, + const std::vector& aux_arrays, + const std::map& group_to_ctx = std::map(), + Executor* shared_exec = nullptr); + explicit Executor(const CachedOpHandle& h) { + handle_ = h; + } /*! - * \brief Perform a Forward operation of Operator - * After this operation, user can get the result by using function head. - */ + * \brief Perform a Forward operation of Operator + * After this operation, user can get the result by using function head. + */ void Forward(bool is_train) { std::vector arg_handles; - for (const auto &array : combined_arrays) { + for (const auto& array : combined_arrays) { arg_handles.push_back(array.GetHandle()); } - int prev_is_record = 0; + int prev_is_record = 0; int prev_train_mode = 0; CHECK_EQ(MXAutogradSetIsRecording(1, &prev_is_record), 0); if (is_train == true) { CHECK_EQ(MXAutogradSetIsTraining(1, &prev_train_mode), 0); } std::vector output_handles; - std::transform(outputs.begin(), outputs.end(), - std::back_inserter(output_handles), [](NDArray& a) { + std::transform( + outputs.begin(), outputs.end(), std::back_inserter(output_handles), [](NDArray& a) { return a.GetHandle(); }); - int out_size = 0; - NDArrayHandle *out_array = nullptr; - CHECK_EQ(MXInvokeCachedOp(handle_, arg_handles.size(), arg_handles.data(), - device_type, device_id, &out_size, &out_array, nullptr), + int out_size = 0; + NDArrayHandle* out_array = nullptr; + CHECK_EQ(MXInvokeCachedOp(handle_, + arg_handles.size(), + arg_handles.data(), + device_type, + device_id, + &out_size, + &out_array, + nullptr), 0); outputs.clear(); outputs.reserve(out_size); @@ -84,30 +92,29 @@ class Executor { outputs.push_back(NDArray(out_array[i])); } int cur_train_mode = prev_train_mode; - int cur_is_record = prev_is_record; + int cur_is_record = prev_is_record; if (is_train == true) { CHECK_EQ(MXAutogradSetIsTraining(cur_train_mode, &prev_train_mode), 0); } CHECK_EQ(MXAutogradSetIsRecording(cur_is_record, &prev_is_record), 0); } /*! - * \brief Perform a Backward operation of the Operator. - * This must be called after Forward. - * After this operation, NDArrays specified by grad_in_args_store will be - *updated accordingly. - * User is allowed to pass in an empty Array if the head node is - * loss function and head gradeitn is not needed. - * - * \param head_grads the gradient of head nodes to be backproped. - */ - void Backward(const std::vector &head_grads = - std::vector()) { + * \brief Perform a Backward operation of the Operator. + * This must be called after Forward. + * After this operation, NDArrays specified by grad_in_args_store will be + *updated accordingly. + * User is allowed to pass in an empty Array if the head node is + * loss function and head gradeitn is not needed. + * + * \param head_grads the gradient of head nodes to be backproped. + */ + void Backward(const std::vector& head_grads = std::vector()) { if (require_grad == true) { if (outputs.size() == 0) { Forward(false); } std::vector out_handles; - for (const auto &array : outputs) { + for (const auto& array : outputs) { out_handles.push_back(array.GetHandle()); } std::vector head_grads_; @@ -115,17 +122,33 @@ class Executor { head_grads_.push_back(d.GetHandle()); } if (head_grads_.size() > 0) { - CHECK_EQ(MXAutogradBackwardEx(out_handles.size(), out_handles.data(), - head_grads_.data(), 0, nullptr, 0, 0, 1, - nullptr, nullptr), 0); + CHECK_EQ(MXAutogradBackwardEx(out_handles.size(), + out_handles.data(), + head_grads_.data(), + 0, + nullptr, + 0, + 0, + 1, + nullptr, + nullptr), + 0); } else { - CHECK_EQ(MXAutogradBackwardEx(out_handles.size(), out_handles.data(), - nullptr, 0, nullptr, 0, 0, 1, - nullptr, nullptr), 0); + CHECK_EQ(MXAutogradBackwardEx(out_handles.size(), + out_handles.data(), + nullptr, + 0, + nullptr, + 0, + 0, + 1, + nullptr, + nullptr), + 0); } grad_arrays.clear(); grad_arrays.reserve(arg_arrays.size()); - for (const auto &array : arg_arrays) { + for (const auto& array : arg_arrays) { NDArrayHandle grad; CHECK_EQ(MXNDArrayGetGrad(array.GetHandle(), &grad), 0); grad_arrays.push_back(NDArray(grad)); @@ -136,9 +159,11 @@ class Executor { // To implement reshape function void Reshape(); /*! - * \brief destructor, free the handle - */ - ~Executor() { MXFreeCachedOp(handle_); } + * \brief destructor, free the handle + */ + ~Executor() { + MXFreeCachedOp(handle_); + } std::vector arg_arrays; std::vector grad_arrays; std::vector aux_arrays; @@ -147,8 +172,8 @@ class Executor { int device_id; bool require_grad; /*! - * \brief arrays store the outputs of forward - */ + * \brief arrays store the outputs of forward + */ std::vector outputs; std::map arg_dict() { return GetDict(symbol_.ListArguments(), arg_arrays); @@ -161,21 +186,19 @@ class Executor { } private: - Executor(const Executor &e); - Executor &operator=(const Executor &e); + Executor(const Executor& e); + Executor& operator=(const Executor& e); CachedOpHandle handle_; Symbol symbol_; - std::map GetDict(const std::vector &names, - const std::vector &arrays) { + std::map GetDict(const std::vector& names, + const std::vector& arrays) { std::map ret; std::set name_set; - for (const auto &s : names) { - CHECK(name_set.find(s) == name_set.end()) << "Duplicate names detected, " - << s; + for (const auto& s : names) { + CHECK(name_set.find(s) == name_set.end()) << "Duplicate names detected, " << s; name_set.insert(s); } - CHECK_EQ(name_set.size(), arrays.size()) - << "names size not equal to arrays size"; + CHECK_EQ(name_set.size(), arrays.size()) << "names size not equal to arrays size"; for (size_t i = 0; i < names.size(); ++i) { ret[names[i]] = arrays[i]; } diff --git a/cpp-package/include/mxnet-cpp/initializer.h b/cpp-package/include/mxnet-cpp/initializer.h index 356591f8bf8e..5f509c2aa5a0 100644 --- a/cpp-package/include/mxnet-cpp/initializer.h +++ b/cpp-package/include/mxnet-cpp/initializer.h @@ -37,16 +37,12 @@ namespace cpp { class Initializer { public: - static bool StringStartWith(const std::string& name, - const std::string& check_str) { - return (name.size() >= check_str.size() && - name.substr(0, check_str.size()) == check_str); + static bool StringStartWith(const std::string& name, const std::string& check_str) { + return (name.size() >= check_str.size() && name.substr(0, check_str.size()) == check_str); } - static bool StringEndWith(const std::string& name, - const std::string& check_str) { + static bool StringEndWith(const std::string& name, const std::string& check_str) { return (name.size() >= check_str.size() && - name.substr(name.size() - check_str.size(), check_str.size()) == - check_str); + name.substr(name.size() - check_str.size(), check_str.size()) == check_str); } virtual void operator()(const std::string& name, NDArray* arr) { if (StringStartWith(name, "upsampling")) { @@ -84,20 +80,30 @@ class Initializer { virtual void InitBilinear(NDArray* arr) { Shape shape(arr->GetShape()); std::vector weight(shape.Size(), 0); - int f = std::ceil(shape[3] / 2.0); + int f = std::ceil(shape[3] / 2.0); float c = (2 * f - 1 - f % 2) / (2. * f); for (size_t i = 0; i < shape.Size(); ++i) { - int x = i % shape[3]; - int y = (i / shape[3]) % shape[2]; + int x = i % shape[3]; + int y = (i / shape[3]) % shape[2]; weight[i] = (1 - std::abs(x / f - c)) * (1 - std::abs(y / f - c)); } (*arr).SyncCopyFromCPU(weight); } - virtual void InitZero(NDArray* arr) { (*arr) = 0.0f; } - virtual void InitOne(NDArray* arr) { (*arr) = 1.0f; } - virtual void InitBias(NDArray* arr) { (*arr) = 0.0f; } - virtual void InitGamma(NDArray* arr) { (*arr) = 1.0f; } - virtual void InitBeta(NDArray* arr) { (*arr) = 0.0f; } + virtual void InitZero(NDArray* arr) { + (*arr) = 0.0f; + } + virtual void InitOne(NDArray* arr) { + (*arr) = 1.0f; + } + virtual void InitBias(NDArray* arr) { + (*arr) = 0.0f; + } + virtual void InitGamma(NDArray* arr) { + (*arr) = 1.0f; + } + virtual void InitBeta(NDArray* arr) { + (*arr) = 0.0f; + } virtual void InitWeight(NDArray* arr) {} virtual void InitQuantizedWeight(NDArray* arr) { std::default_random_engine generator; @@ -112,32 +118,30 @@ class Initializer { class Constant : public Initializer { public: - explicit Constant(float value) - : value(value) {} - void operator()(const std::string &name, NDArray *arr) override { + explicit Constant(float value) : value(value) {} + void operator()(const std::string& name, NDArray* arr) override { (*arr) = value; } + protected: float value; }; class Zero : public Constant { public: - Zero(): Constant(0.0f) {} + Zero() : Constant(0.0f) {} }; class One : public Constant { public: - One(): Constant(1.0f) {} + One() : Constant(1.0f) {} }; class Uniform : public Initializer { public: - explicit Uniform(float scale) - : Uniform(-scale, scale) {} - Uniform(float begin, float end) - : begin(begin), end(end) {} - void operator()(const std::string &name, NDArray *arr) override { + explicit Uniform(float scale) : Uniform(-scale, scale) {} + Uniform(float begin, float end) : begin(begin), end(end) {} + void operator()(const std::string& name, NDArray* arr) override { if (StringEndWith(name, "weight_quantize")) { InitQuantizedWeight(arr); return; @@ -148,15 +152,15 @@ class Uniform : public Initializer { } NDArray::SampleUniform(begin, end, arr); } + protected: float begin, end; }; class Normal : public Initializer { public: - Normal(float mu, float sigma) - : mu(mu), sigma(sigma) {} - void operator()(const std::string &name, NDArray *arr) override { + Normal(float mu, float sigma) : mu(mu), sigma(sigma) {} + void operator()(const std::string& name, NDArray* arr) override { if (StringEndWith(name, "weight_quantize")) { InitQuantizedWeight(arr); return; @@ -167,6 +171,7 @@ class Normal : public Initializer { } NDArray::SampleGaussian(mu, sigma, arr); } + protected: float mu, sigma; }; @@ -174,7 +179,7 @@ class Normal : public Initializer { class Bilinear : public Initializer { public: Bilinear() {} - void operator()(const std::string &name, NDArray *arr) override { + void operator()(const std::string& name, NDArray* arr) override { if (StringEndWith(name, "weight_quantize")) { InitQuantizedWeight(arr); return; @@ -189,21 +194,13 @@ class Bilinear : public Initializer { class Xavier : public Initializer { public: - enum RandType { - gaussian, - uniform - } rand_type; - enum FactorType { - avg, - in, - out - } factor_type; + enum RandType { gaussian, uniform } rand_type; + enum FactorType { avg, in, out } factor_type; float magnitude; - Xavier(RandType rand_type = gaussian, FactorType factor_type = avg, - float magnitude = 3) + Xavier(RandType rand_type = gaussian, FactorType factor_type = avg, float magnitude = 3) : rand_type(rand_type), factor_type(factor_type), magnitude(magnitude) {} - void operator()(const std::string &name, NDArray* arr) override { + void operator()(const std::string& name, NDArray* arr) override { if (StringEndWith(name, "weight_quantize")) { InitQuantizedWeight(arr); return; diff --git a/cpp-package/include/mxnet-cpp/io.h b/cpp-package/include/mxnet-cpp/io.h index 09fa8061fef6..72441c84dadb 100644 --- a/cpp-package/include/mxnet-cpp/io.h +++ b/cpp-package/include/mxnet-cpp/io.h @@ -18,10 +18,10 @@ */ /*! -* \file operator.h -* \brief definition of io, such as DataIter -* \author Zhang Chen -*/ + * \file operator.h + * \brief definition of io, such as DataIter + * \author Zhang Chen + */ #ifndef MXNET_CPP_IO_H_ #define MXNET_CPP_IO_H_ @@ -36,9 +36,9 @@ namespace mxnet { namespace cpp { /*! -* \brief Default object for holding a mini-batch of data and related -* information. -*/ + * \brief Default object for holding a mini-batch of data and related + * information. + */ class DataBatch { public: NDArray data; @@ -48,17 +48,19 @@ class DataBatch { }; class DataIter { public: - virtual void BeforeFirst(void) = 0; - virtual bool Next(void) = 0; - virtual NDArray GetData(void) = 0; - virtual NDArray GetLabel(void) = 0; - virtual int GetPadNum(void) = 0; + virtual void BeforeFirst(void) = 0; + virtual bool Next(void) = 0; + virtual NDArray GetData(void) = 0; + virtual NDArray GetLabel(void) = 0; + virtual int GetPadNum(void) = 0; virtual std::vector GetIndex(void) = 0; DataBatch GetDataBatch() { return DataBatch{GetData(), GetLabel(), GetPadNum(), GetIndex()}; } - void Reset() { BeforeFirst(); } + void Reset() { + BeforeFirst(); + } virtual ~DataIter() = default; }; @@ -66,25 +68,29 @@ class DataIter { class MXDataIterMap { public: inline MXDataIterMap() { - mx_uint num_data_iter_creators = 0; - DataIterCreator *data_iter_creators = nullptr; + mx_uint num_data_iter_creators = 0; + DataIterCreator* data_iter_creators = nullptr; int r = MXListDataIters(&num_data_iter_creators, &data_iter_creators); CHECK_EQ(r, 0); for (mx_uint i = 0; i < num_data_iter_creators; i++) { - const char *name; - const char *description; + const char* name; + const char* description; mx_uint num_args; - const char **arg_names; - const char **arg_type_infos; - const char **arg_descriptions; - r = MXDataIterGetIterInfo(data_iter_creators[i], &name, &description, - &num_args, &arg_names, &arg_type_infos, + const char** arg_names; + const char** arg_type_infos; + const char** arg_descriptions; + r = MXDataIterGetIterInfo(data_iter_creators[i], + &name, + &description, + &num_args, + &arg_names, + &arg_type_infos, &arg_descriptions); CHECK_EQ(r, 0); mxdataiter_creators_[name] = data_iter_creators[i]; } } - inline DataIterCreator GetMXDataIterCreator(const std::string &name) { + inline DataIterCreator GetMXDataIterCreator(const std::string& name) { return mxdataiter_creators_[name]; } @@ -96,19 +102,21 @@ struct MXDataIterBlob { public: MXDataIterBlob() : handle_(nullptr) {} explicit MXDataIterBlob(DataIterHandle handle) : handle_(handle) {} - ~MXDataIterBlob() { MXDataIterFree(handle_); } + ~MXDataIterBlob() { + MXDataIterFree(handle_); + } DataIterHandle handle_; private: - MXDataIterBlob &operator=(const MXDataIterBlob &); + MXDataIterBlob& operator=(const MXDataIterBlob&); }; class MXDataIter : public DataIter { public: - explicit MXDataIter(const std::string &mxdataiter_type); - MXDataIter(const MXDataIter &other) { - creator_ = other.creator_; - params_ = other.params_; + explicit MXDataIter(const std::string& mxdataiter_type); + MXDataIter(const MXDataIter& other) { + creator_ = other.creator_; + params_ = other.params_; blob_ptr_ = other.blob_ptr_; } void BeforeFirst(); @@ -125,7 +133,7 @@ class MXDataIter : public DataIter { * \return reference of self */ template - MXDataIter &SetParam(const std::string &name, const T &value) { + MXDataIter& SetParam(const std::string& name, const T& value) { std::string value_str; std::stringstream ss; ss << value; @@ -145,4 +153,3 @@ class MXDataIter : public DataIter { } // namespace mxnet #endif // MXNET_CPP_IO_H_ - diff --git a/cpp-package/include/mxnet-cpp/kvstore.h b/cpp-package/include/mxnet-cpp/kvstore.h index 0080be1e7306..20267f73b4f7 100644 --- a/cpp-package/include/mxnet-cpp/kvstore.h +++ b/cpp-package/include/mxnet-cpp/kvstore.h @@ -18,10 +18,10 @@ */ /*! -* \file kvstore.h -* \brief definition of kvstore -* \author Chuntao Hong -*/ + * \file kvstore.h + * \brief definition of kvstore + * \author Chuntao Hong + */ #ifndef MXNET_CPP_KVSTORE_H_ #define MXNET_CPP_KVSTORE_H_ @@ -44,15 +44,17 @@ class KVStore { static void Push(int key, const NDArray& val, int priority = 0); static void Push(const std::string& key, const NDArray& val, int priority = 0); static void Push(const std::vector& keys, - const std::vector& vals, int priority = 0); + const std::vector& vals, + int priority = 0); static void Push(const std::vector& keys, - const std::vector& vals, int priority = 0); + const std::vector& vals, + int priority = 0); static void Pull(int key, NDArray* out, int priority = 0); static void Pull(const std::string& key, NDArray* out, int priority = 0); - static void Pull(const std::vector& keys, - std::vector* outs, int priority = 0); + static void Pull(const std::vector& keys, std::vector* outs, int priority = 0); static void Pull(const std::vector& keys, - std::vector* outs, int priority = 0); + std::vector* outs, + int priority = 0); // TODO(lx): put lr in optimizer or not? static void SetOptimizer(std::unique_ptr optimizer, bool local = false); static std::string GetType(); diff --git a/cpp-package/include/mxnet-cpp/lr_scheduler.h b/cpp-package/include/mxnet-cpp/lr_scheduler.h index b9381a830a88..574472d3b7c4 100644 --- a/cpp-package/include/mxnet-cpp/lr_scheduler.h +++ b/cpp-package/include/mxnet-cpp/lr_scheduler.h @@ -18,9 +18,9 @@ */ /*! -* \file lr_scheduler.h -* \brief Scheduling learning rate -*/ + * \file lr_scheduler.h + * \brief Scheduling learning rate + */ #ifndef MXNET_CPP_LR_SCHEDULER_H_ #define MXNET_CPP_LR_SCHEDULER_H_ @@ -31,28 +31,29 @@ namespace mxnet { namespace cpp { /*! -* \brief lr scheduler interface -*/ + * \brief lr scheduler interface + */ class LRScheduler { public: /*! - * \brief constructor - * \param base_lr the initial learning rate. - */ - explicit LRScheduler(float base_lr = 0.01) - : base_lr_(base_lr) {} + * \brief constructor + * \param base_lr the initial learning rate. + */ + explicit LRScheduler(float base_lr = 0.01) : base_lr_(base_lr) {} /*! - * \brief set base lr - * \param lr learning rate from optimizer - */ - void SetLR(const float lr) { base_lr_ = lr; } + * \brief set base lr + * \param lr learning rate from optimizer + */ + void SetLR(const float lr) { + base_lr_ = lr; + } /*! - * \brief get a new learning rate - */ + * \brief get a new learning rate + */ virtual float GetLR(unsigned num_update) = 0; /*! - * \brief destructor - */ + * \brief destructor + */ virtual ~LRScheduler() {} protected: @@ -63,8 +64,8 @@ class FactorScheduler : public LRScheduler { public: explicit FactorScheduler(int step, float factor = 1, float stop_factor_lr = 1e-8) : LRScheduler() { - step_ = step; - factor_ = factor; + step_ = step; + factor_ = factor; stop_factor_lr_ = stop_factor_lr; } @@ -74,8 +75,8 @@ class FactorScheduler : public LRScheduler { base_lr_ *= factor_; if (base_lr_ < stop_factor_lr_) { base_lr_ = stop_factor_lr_; - LG << "Update[" << num_update << "]: now learning rate arrived at " \ - << base_lr_ << ", will not change in the future"; + LG << "Update[" << num_update << "]: now learning rate arrived at " << base_lr_ + << ", will not change in the future"; } else { LG << "Update[" << num_update << "]: Change learning rate to " << base_lr_; } diff --git a/cpp-package/include/mxnet-cpp/metric.h b/cpp-package/include/mxnet-cpp/metric.h index 6dbb197dae49..7e3f39e65b96 100644 --- a/cpp-package/include/mxnet-cpp/metric.h +++ b/cpp-package/include/mxnet-cpp/metric.h @@ -18,10 +18,10 @@ */ /*! -* \file base.h -* \brief metrics defined -* \author Zhang Chen -*/ + * \file base.h + * \brief metrics defined + * \author Zhang Chen + */ #ifndef MXNET_CPP_METRIC_H_ #define MXNET_CPP_METRIC_H_ @@ -38,24 +38,24 @@ namespace cpp { class EvalMetric { public: - explicit EvalMetric(const std::string& name, int num = 0) - : name(name), num(num) {} + explicit EvalMetric(const std::string& name, int num = 0) : name(name), num(num) {} virtual void Update(NDArray labels, NDArray preds) = 0; void Reset() { - num_inst = 0; + num_inst = 0; sum_metric = 0.0f; } - float Get() { return sum_metric / num_inst; } + float Get() { + return sum_metric / num_inst; + } void GetNameValue(); protected: std::string name; int num; float sum_metric = 0.0f; - int num_inst = 0; + int num_inst = 0; - static void CheckLabelShapes(NDArray labels, NDArray preds, - bool strict = false) { + static void CheckLabelShapes(NDArray labels, NDArray preds, bool strict = false) { if (strict) { CHECK_EQ(Shape(labels.GetShape()), Shape(preds.GetShape())); } else { @@ -88,15 +88,14 @@ class LogLoss : public EvalMetric { void Update(NDArray labels, NDArray preds) override { static const float epsilon = 1e-15; - mx_uint len = labels.GetShape()[0]; - mx_uint m = preds.GetShape()[1]; + mx_uint len = labels.GetShape()[0]; + mx_uint m = preds.GetShape()[1]; std::vector pred_data(len * m); std::vector label_data(len); preds.SyncCopyToCPU(&pred_data, pred_data.size()); labels.SyncCopyToCPU(&label_data, len); for (mx_uint i = 0; i < len; ++i) { - sum_metric += - -std::log(std::max(pred_data[i * m + label_data[i]], epsilon)); + sum_metric += -std::log(std::max(pred_data[i * m + label_data[i]], epsilon)); num_inst += 1; } } @@ -114,7 +113,7 @@ class MAE : public EvalMetric { std::vector label_data; labels.SyncCopyToCPU(&label_data); - size_t len = preds.Size(); + size_t len = preds.Size(); mx_float sum = 0; for (size_t i = 0; i < len; ++i) { sum += std::abs(pred_data[i] - label_data[i]); @@ -136,7 +135,7 @@ class MSE : public EvalMetric { std::vector label_data; labels.SyncCopyToCPU(&label_data); - size_t len = preds.Size(); + size_t len = preds.Size(); mx_float sum = 0; for (size_t i = 0; i < len; ++i) { mx_float diff = pred_data[i] - label_data[i]; @@ -159,7 +158,7 @@ class RMSE : public EvalMetric { std::vector label_data; labels.SyncCopyToCPU(&label_data); - size_t len = preds.Size(); + size_t len = preds.Size(); mx_float sum = 0; for (size_t i = 0; i < len; ++i) { mx_float diff = pred_data[i] - label_data[i]; @@ -172,8 +171,7 @@ class RMSE : public EvalMetric { class PSNR : public EvalMetric { public: - PSNR() : EvalMetric("psnr") { - } + PSNR() : EvalMetric("psnr") {} void Update(NDArray labels, NDArray preds) override { CheckLabelShapes(labels, preds); @@ -183,7 +181,7 @@ class PSNR : public EvalMetric { std::vector label_data; labels.SyncCopyToCPU(&label_data); - size_t len = preds.Size(); + size_t len = preds.Size(); mx_float sum = 0; for (size_t i = 0; i < len; ++i) { mx_float diff = pred_data[i] - label_data[i]; @@ -206,4 +204,3 @@ class PSNR : public EvalMetric { } // namespace mxnet #endif // MXNET_CPP_METRIC_H_ - diff --git a/cpp-package/include/mxnet-cpp/model.h b/cpp-package/include/mxnet-cpp/model.h index c8af6a476a52..8ca718d0ed83 100644 --- a/cpp-package/include/mxnet-cpp/model.h +++ b/cpp-package/include/mxnet-cpp/model.h @@ -18,10 +18,10 @@ */ /*! -* \file model.h -* \brief MXNET.cpp model module -* \author Zhang Chen -*/ + * \file model.h + * \brief MXNET.cpp model module + * \author Zhang Chen + */ #ifndef MXNET_CPP_MODEL_H_ #define MXNET_CPP_MODEL_H_ @@ -38,9 +38,9 @@ namespace cpp { struct FeedForwardConfig { Symbol symbol; std::vector ctx = {Context::cpu()}; - int num_epoch = 0; - int epoch_size = 0; - std::string optimizer = "sgd"; + int num_epoch = 0; + int epoch_size = 0; + std::string optimizer = "sgd"; // TODO(zhangchen-qinyinghua) More implement // initializer=Uniform(0.01), // numpy_batch_size=128, @@ -48,12 +48,12 @@ struct FeedForwardConfig { // allow_extra_params=False, // begin_epoch=0, // **kwargs): - FeedForwardConfig(const FeedForwardConfig &other) {} + FeedForwardConfig(const FeedForwardConfig& other) {} FeedForwardConfig() {} }; class FeedForward { public: - explicit FeedForward(const FeedForwardConfig &conf) : conf_(conf) {} + explicit FeedForward(const FeedForwardConfig& conf) : conf_(conf) {} void Predict(); void Score(); void Fit(); @@ -73,4 +73,3 @@ class FeedForward { } // namespace mxnet #endif // MXNET_CPP_MODEL_H_ - diff --git a/cpp-package/include/mxnet-cpp/ndarray.h b/cpp-package/include/mxnet-cpp/ndarray.h index 793f0e87d9dd..60c30957a4cc 100644 --- a/cpp-package/include/mxnet-cpp/ndarray.h +++ b/cpp-package/include/mxnet-cpp/ndarray.h @@ -18,10 +18,10 @@ */ /*! -* \file ndarray.h -* \brief definition of ndarray -* \author Chuntao Hong, Zhang Chen -*/ + * \file ndarray.h + * \brief definition of ndarray + * \author Chuntao Hong, Zhang Chen + */ #ifndef MXNET_CPP_NDARRAY_H_ #define MXNET_CPP_NDARRAY_H_ @@ -37,31 +37,31 @@ namespace mxnet { namespace cpp { -enum DeviceType { - kCPU = 1, - kGPU = 2, - kCPUPinned = 3 -}; +enum DeviceType { kCPU = 1, kGPU = 2, kCPUPinned = 3 }; /*! -* \brief Context interface -*/ + * \brief Context interface + */ class Context { public: /*! - * \brief Context constructor - * \param type type of the device - * \param id id of the device - */ - Context(const DeviceType &type, int id) : type_(type), id_(id) {} + * \brief Context constructor + * \param type type of the device + * \param id id of the device + */ + Context(const DeviceType& type, int id) : type_(type), id_(id) {} /*! - * \return the type of the device - */ - DeviceType GetDeviceType() const { return type_; } + * \return the type of the device + */ + DeviceType GetDeviceType() const { + return type_; + } /*! - * \return the id of the device - */ - int GetDeviceId() const { return id_; } + * \return the id of the device + */ + int GetDeviceId() const { + return id_; + } /*! * \brief Return a GPU context @@ -87,229 +87,231 @@ class Context { }; /*! -* \brief struct to store NDArrayHandle -*/ + * \brief struct to store NDArrayHandle + */ struct NDBlob { public: /*! - * \brief default constructor - */ + * \brief default constructor + */ NDBlob() : handle_(nullptr) {} /*! - * \brief construct with a NDArrayHandle - * \param handle NDArrayHandle to store - */ + * \brief construct with a NDArrayHandle + * \param handle NDArrayHandle to store + */ explicit NDBlob(NDArrayHandle handle) : handle_(handle) {} /*! - * \brief destructor, free the NDArrayHandle - */ - ~NDBlob() { MXNDArrayFree(handle_); } + * \brief destructor, free the NDArrayHandle + */ + ~NDBlob() { + MXNDArrayFree(handle_); + } /*! - * \brief the NDArrayHandle - */ + * \brief the NDArrayHandle + */ NDArrayHandle handle_; private: - NDBlob(const NDBlob &); - NDBlob &operator=(const NDBlob &); + NDBlob(const NDBlob&); + NDBlob& operator=(const NDBlob&); }; /*! -* \brief NDArray interface -*/ + * \brief NDArray interface + */ class NDArray { public: /*! - * \brief construct with a none handle - */ + * \brief construct with a none handle + */ NDArray(); /*! - * \brief construct with a NDArrayHandle - */ - explicit NDArray(const NDArrayHandle &handle); - /*! - * \brief construct a new dynamic NDArray - * \param shape the shape of array - * \param context context of NDArray - * \param delay_alloc whether delay the allocation - * \param dtype data type of NDArray - */ - NDArray(const std::vector &shape, const Context &context, - bool delay_alloc = true, int dtype = 0); - /*! - * \brief construct a new dynamic NDArray - * \param shape the shape of array - * \param constext context of NDArray - * \param delay_alloc whether delay the allocation - * \param dtype data type of NDArray - */ - NDArray(const Shape &shape, const Context &context, - bool delay_alloc = true, int dtype = 0); - NDArray(const mx_float *data, size_t size); - /*! - * \brief construct a new dynamic NDArray - * \param data the data to create NDArray from - * \param shape the shape of array - * \param constext context of NDArray - */ - NDArray(const mx_float *data, const Shape &shape, const Context &context); - /*! - * \brief construct a new dynamic NDArray - * \param data the data to create NDArray from - * \param shape the shape of array - * \param constext context of NDArray - */ - NDArray(const std::vector &data, const Shape &shape, - const Context &context); - explicit NDArray(const std::vector &data); + * \brief construct with a NDArrayHandle + */ + explicit NDArray(const NDArrayHandle& handle); + /*! + * \brief construct a new dynamic NDArray + * \param shape the shape of array + * \param context context of NDArray + * \param delay_alloc whether delay the allocation + * \param dtype data type of NDArray + */ + NDArray(const std::vector& shape, + const Context& context, + bool delay_alloc = true, + int dtype = 0); + /*! + * \brief construct a new dynamic NDArray + * \param shape the shape of array + * \param constext context of NDArray + * \param delay_alloc whether delay the allocation + * \param dtype data type of NDArray + */ + NDArray(const Shape& shape, const Context& context, bool delay_alloc = true, int dtype = 0); + NDArray(const mx_float* data, size_t size); + /*! + * \brief construct a new dynamic NDArray + * \param data the data to create NDArray from + * \param shape the shape of array + * \param constext context of NDArray + */ + NDArray(const mx_float* data, const Shape& shape, const Context& context); + /*! + * \brief construct a new dynamic NDArray + * \param data the data to create NDArray from + * \param shape the shape of array + * \param constext context of NDArray + */ + NDArray(const std::vector& data, const Shape& shape, const Context& context); + explicit NDArray(const std::vector& data); NDArray operator+(mx_float scalar); NDArray operator-(mx_float scalar); NDArray operator*(mx_float scalar); NDArray operator/(mx_float scalar); NDArray operator%(mx_float scalar); - NDArray operator+(const NDArray &); - NDArray operator-(const NDArray &); - NDArray operator*(const NDArray &); - NDArray operator/(const NDArray &); - NDArray operator%(const NDArray &); - /*! - * \brief set all the elements in ndarray to be scalar - * \param scalar the scalar to set - * \return reference of self - */ - NDArray &operator=(mx_float scalar); - /*! - * \brief elementwise add to current space - * this mutate the current NDArray - * \param scalar the data to add - * \return reference of self - */ - NDArray &operator+=(mx_float scalar); - /*! - * \brief elementwise subtract from current ndarray - * this mutate the current NDArray - * \param scalar the data to subtract - * \return reference of self - */ - NDArray &operator-=(mx_float scalar); - /*! - * \brief elementwise multiplication to current ndarray - * this mutate the current NDArray - * \param scalar the data to subtract - * \return reference of self - */ - NDArray &operator*=(mx_float scalar); - /*! - * \brief elementwise division from current ndarray - * this mutate the current NDArray - * \param scalar the data to subtract - * \return reference of self - */ - NDArray &operator/=(mx_float scalar); - /*! - * \brief elementwise modulo from current ndarray - * this mutate the current NDArray - * \param scalar the data to subtract - * \return reference of self - */ - NDArray &operator%=(mx_float scalar); - /*! - * \brief elementwise add to current space - * this mutate the current NDArray - * \param src the data to add - * \return reference of self - */ - NDArray &operator+=(const NDArray &src); - /*! - * \brief elementwise subtract from current ndarray - * this mutate the current NDArray - * \param src the data to subtract - * \return reference of self - */ - NDArray &operator-=(const NDArray &src); - /*! - * \brief elementwise multiplication to current ndarray - * this mutate the current NDArray - * \param src the data to subtract - * \return reference of self - */ - NDArray &operator*=(const NDArray &src); - /*! - * \brief elementwise division from current ndarray - * this mutate the current NDArray - * \param src the data to subtract - * \return reference of self - */ - NDArray &operator/=(const NDArray &src); - /*! - * \brief elementwise modulo from current ndarray - * this mutate the current NDArray - * \param src the data to subtract - * \return reference of self - */ - NDArray &operator%=(const NDArray &src); + NDArray operator+(const NDArray&); + NDArray operator-(const NDArray&); + NDArray operator*(const NDArray&); + NDArray operator/(const NDArray&); + NDArray operator%(const NDArray&); + /*! + * \brief set all the elements in ndarray to be scalar + * \param scalar the scalar to set + * \return reference of self + */ + NDArray& operator=(mx_float scalar); + /*! + * \brief elementwise add to current space + * this mutate the current NDArray + * \param scalar the data to add + * \return reference of self + */ + NDArray& operator+=(mx_float scalar); + /*! + * \brief elementwise subtract from current ndarray + * this mutate the current NDArray + * \param scalar the data to subtract + * \return reference of self + */ + NDArray& operator-=(mx_float scalar); + /*! + * \brief elementwise multiplication to current ndarray + * this mutate the current NDArray + * \param scalar the data to subtract + * \return reference of self + */ + NDArray& operator*=(mx_float scalar); + /*! + * \brief elementwise division from current ndarray + * this mutate the current NDArray + * \param scalar the data to subtract + * \return reference of self + */ + NDArray& operator/=(mx_float scalar); + /*! + * \brief elementwise modulo from current ndarray + * this mutate the current NDArray + * \param scalar the data to subtract + * \return reference of self + */ + NDArray& operator%=(mx_float scalar); + /*! + * \brief elementwise add to current space + * this mutate the current NDArray + * \param src the data to add + * \return reference of self + */ + NDArray& operator+=(const NDArray& src); + /*! + * \brief elementwise subtract from current ndarray + * this mutate the current NDArray + * \param src the data to subtract + * \return reference of self + */ + NDArray& operator-=(const NDArray& src); + /*! + * \brief elementwise multiplication to current ndarray + * this mutate the current NDArray + * \param src the data to subtract + * \return reference of self + */ + NDArray& operator*=(const NDArray& src); + /*! + * \brief elementwise division from current ndarray + * this mutate the current NDArray + * \param src the data to subtract + * \return reference of self + */ + NDArray& operator/=(const NDArray& src); + /*! + * \brief elementwise modulo from current ndarray + * this mutate the current NDArray + * \param src the data to subtract + * \return reference of self + */ + NDArray& operator%=(const NDArray& src); NDArray ArgmaxChannel(); /*! - * \brief Do a synchronize copy from a contiguous CPU memory region. - * - * This function will call WaitToWrite before the copy is performed. - * This is useful to copy data from existing memory region that are - * not wrapped by NDArray(thus dependency not being tracked). - * - * \param data the data source to copy from. - * \param size the memory size we want to copy from. - */ - void SyncCopyFromCPU(const mx_float *data, size_t size); - /*! - * \brief Do a synchronize copy from a contiguous CPU memory region. - * - * This function will call WaitToWrite before the copy is performed. - * This is useful to copy data from existing memory region that are - * not wrapped by NDArray(thus dependency not being tracked). - * - * \param data the data source to copy from, int the form of mx_float vector - */ - void SyncCopyFromCPU(const std::vector &data); - /*! - * \brief Do a synchronize copy to a contiguous CPU memory region. - * - * This function will call WaitToRead before the copy is performed. - * This is useful to copy data from existing memory region that are - * not wrapped by NDArray(thus dependency not being tracked). - * - * \param data the data source to copyinto. - * \param size the memory size we want to copy into. Defualt value is Size() - */ - void SyncCopyToCPU(mx_float *data, size_t size = 0); - /*! - * \brief Do a synchronize copy to a contiguous CPU memory region. - * - * This function will call WaitToRead before the copy is performed. - * This is useful to copy data from existing memory region that are - * not wrapped by NDArray(thus dependency not being tracked). - * - * \param data the data source to copyinto. - * \param size the memory size we want to copy into. Defualt value is Size() - */ - void SyncCopyToCPU(std::vector *data, size_t size = 0); - /*! - * \brief copy the content of current array to a target array. - * \param other the target NDArray - * \return the target NDarray - */ - NDArray CopyTo(NDArray * other) const; - /*! - * \brief return a new copy to this NDArray - * \param Context the new context of this NDArray - * \return the new copy - */ - NDArray Copy(const Context &) const; - /*! - * \brief return offset of the element at (h, w) - * \param h height position - * \param w width position - * \return offset of two dimensions array - */ + * \brief Do a synchronize copy from a contiguous CPU memory region. + * + * This function will call WaitToWrite before the copy is performed. + * This is useful to copy data from existing memory region that are + * not wrapped by NDArray(thus dependency not being tracked). + * + * \param data the data source to copy from. + * \param size the memory size we want to copy from. + */ + void SyncCopyFromCPU(const mx_float* data, size_t size); + /*! + * \brief Do a synchronize copy from a contiguous CPU memory region. + * + * This function will call WaitToWrite before the copy is performed. + * This is useful to copy data from existing memory region that are + * not wrapped by NDArray(thus dependency not being tracked). + * + * \param data the data source to copy from, int the form of mx_float vector + */ + void SyncCopyFromCPU(const std::vector& data); + /*! + * \brief Do a synchronize copy to a contiguous CPU memory region. + * + * This function will call WaitToRead before the copy is performed. + * This is useful to copy data from existing memory region that are + * not wrapped by NDArray(thus dependency not being tracked). + * + * \param data the data source to copyinto. + * \param size the memory size we want to copy into. Defualt value is Size() + */ + void SyncCopyToCPU(mx_float* data, size_t size = 0); + /*! + * \brief Do a synchronize copy to a contiguous CPU memory region. + * + * This function will call WaitToRead before the copy is performed. + * This is useful to copy data from existing memory region that are + * not wrapped by NDArray(thus dependency not being tracked). + * + * \param data the data source to copyinto. + * \param size the memory size we want to copy into. Defualt value is Size() + */ + void SyncCopyToCPU(std::vector* data, size_t size = 0); + /*! + * \brief copy the content of current array to a target array. + * \param other the target NDArray + * \return the target NDarray + */ + NDArray CopyTo(NDArray* other) const; + /*! + * \brief return a new copy to this NDArray + * \param Context the new context of this NDArray + * \return the new copy + */ + NDArray Copy(const Context&) const; + /*! + * \brief return offset of the element at (h, w) + * \param h height position + * \param w width position + * \return offset of two dimensions array + */ size_t Offset(size_t h = 0, size_t w = 0) const; /*! * \brief return offset of three dimensions array @@ -320,17 +322,17 @@ class NDArray { */ size_t Offset(size_t c, size_t h, size_t w) const; /*! - * \brief return value of the element at (index) - * \param index position - * \return value of one dimensions array - */ + * \brief return value of the element at (index) + * \param index position + * \return value of one dimensions array + */ mx_float At(size_t index) const; /*! - * \brief return value of the element at (h, w) - * \param h height position - * \param w width position - * \return value of two dimensions array - */ + * \brief return value of the element at (h, w) + * \param h height position + * \param w width position + * \return value of two dimensions array + */ mx_float At(size_t h, size_t w) const; /*! * \brief return value of three dimensions array @@ -341,143 +343,144 @@ class NDArray { */ mx_float At(size_t c, size_t h, size_t w) const; /*! - * \brief Slice a NDArray - * \param begin begin index in first dim - * \param end end index in first dim - * \return sliced NDArray - */ + * \brief Slice a NDArray + * \param begin begin index in first dim + * \param end end index in first dim + * \return sliced NDArray + */ NDArray Slice(mx_uint begin, mx_uint end) const; /*! - * \brief Return a reshaped NDArray that shares memory with current one - * \param new_shape the new shape - * \return reshaped NDarray - */ - NDArray Reshape(const Shape &new_shape) const; + * \brief Return a reshaped NDArray that shares memory with current one + * \param new_shape the new shape + * \return reshaped NDarray + */ + NDArray Reshape(const Shape& new_shape) const; /*! - * \brief Block until all the pending write operations with respect - * to current NDArray are finished, and read can be performed. - */ + * \brief Block until all the pending write operations with respect + * to current NDArray are finished, and read can be performed. + */ void WaitToRead() const; /*! - * \brief Block until all the pending read/write operations with respect - * to current NDArray are finished, and write can be performed. - */ + * \brief Block until all the pending read/write operations with respect + * to current NDArray are finished, and write can be performed. + */ void WaitToWrite(); /*! - * \brief Block until all the pending read/write operations with respect - * to current NDArray are finished, and read/write can be performed. - */ + * \brief Block until all the pending read/write operations with respect + * to current NDArray are finished, and read/write can be performed. + */ static void WaitAll(); /*! - * \brief Sample gaussian distribution for each elements of out. - * \param mu mean of gaussian distribution. - * \param sigma standard deviation of gaussian distribution. - * \param out output NDArray. - */ - static void SampleGaussian(mx_float mu, mx_float sigma, NDArray *out); - /*! - * \brief Sample uniform distribution for each elements of out. - * \param begin lower bound of distribution. - * \param end upper bound of distribution. - * \param out output NDArray. - */ - static void SampleUniform(mx_float begin, mx_float end, NDArray *out); - /*! - * \brief Load NDArrays from binary file. - * \param file_name name of the binary file. - * \param array_list a list of NDArrays returned, do not fill the list if - * nullptr is given. - * \param array_map a map from names to NDArrays returned, do not fill the map - * if nullptr is given or no names is stored in binary file. - */ - static void Load(const std::string &file_name, - std::vector *array_list = nullptr, - std::map *array_map = nullptr); - /*! - * \brief Load map of NDArrays from binary file. - * \param file_name name of the binary file. - * \return a list of NDArrays. - */ - static std::map LoadToMap(const std::string &file_name); - /*! - * \brief Load list of NDArrays from binary file. - * \param file_name name of the binary file. - * \return a map from names to NDArrays. - */ - static std::vector LoadToList(const std::string &file_name); - /*! - * \brief Load NDArrays from buffer. - * \param buffer Pointer to buffer. (ie contents of param file) - * \param size Size of buffer - * \param array_list a list of NDArrays returned, do not fill the list if - * nullptr is given. - * \param array_map a map from names to NDArrays returned, do not fill the map - * if nullptr is given or no names is stored in binary file. - */ - static void LoadFromBuffer(const void *buffer, size_t size, - std::vector *array_list = nullptr, - std::map *array_map = nullptr); - /*! - * \brief Load map of NDArrays from buffer. - * \param buffer Pointer to buffer. (ie contents of param file) - * \param size Size of buffer - * \return a list of NDArrays. - */ - static std::map LoadFromBufferToMap(const void *buffer, size_t size); - /*! - * \brief Load list of NDArrays from buffer. - * \param buffer Pointer to buffer. (ie contents of param file) - * \param size Size of buffer - * \return a map from names to NDArrays. - */ - static std::vector LoadFromBufferToList(const void *buffer, size_t size); - /*! - * \brief save a map of string->NDArray to binary file. - * \param file_name name of the binary file. - * \param array_map a map from names to NDArrays. - */ - static void Save(const std::string &file_name, - const std::map &array_map); - /*! - * \brief save a list of NDArrays to binary file. - * \param file_name name of the binary file. - * \param array_list a list of NDArrays. - */ - static void Save(const std::string &file_name, - const std::vector &array_list); - /*! - * \return the size of current NDArray, a.k.a. the production of all shape dims - */ + * \brief Sample gaussian distribution for each elements of out. + * \param mu mean of gaussian distribution. + * \param sigma standard deviation of gaussian distribution. + * \param out output NDArray. + */ + static void SampleGaussian(mx_float mu, mx_float sigma, NDArray* out); + /*! + * \brief Sample uniform distribution for each elements of out. + * \param begin lower bound of distribution. + * \param end upper bound of distribution. + * \param out output NDArray. + */ + static void SampleUniform(mx_float begin, mx_float end, NDArray* out); + /*! + * \brief Load NDArrays from binary file. + * \param file_name name of the binary file. + * \param array_list a list of NDArrays returned, do not fill the list if + * nullptr is given. + * \param array_map a map from names to NDArrays returned, do not fill the map + * if nullptr is given or no names is stored in binary file. + */ + static void Load(const std::string& file_name, + std::vector* array_list = nullptr, + std::map* array_map = nullptr); + /*! + * \brief Load map of NDArrays from binary file. + * \param file_name name of the binary file. + * \return a list of NDArrays. + */ + static std::map LoadToMap(const std::string& file_name); + /*! + * \brief Load list of NDArrays from binary file. + * \param file_name name of the binary file. + * \return a map from names to NDArrays. + */ + static std::vector LoadToList(const std::string& file_name); + /*! + * \brief Load NDArrays from buffer. + * \param buffer Pointer to buffer. (ie contents of param file) + * \param size Size of buffer + * \param array_list a list of NDArrays returned, do not fill the list if + * nullptr is given. + * \param array_map a map from names to NDArrays returned, do not fill the map + * if nullptr is given or no names is stored in binary file. + */ + static void LoadFromBuffer(const void* buffer, + size_t size, + std::vector* array_list = nullptr, + std::map* array_map = nullptr); + /*! + * \brief Load map of NDArrays from buffer. + * \param buffer Pointer to buffer. (ie contents of param file) + * \param size Size of buffer + * \return a list of NDArrays. + */ + static std::map LoadFromBufferToMap(const void* buffer, size_t size); + /*! + * \brief Load list of NDArrays from buffer. + * \param buffer Pointer to buffer. (ie contents of param file) + * \param size Size of buffer + * \return a map from names to NDArrays. + */ + static std::vector LoadFromBufferToList(const void* buffer, size_t size); + /*! + * \brief save a map of string->NDArray to binary file. + * \param file_name name of the binary file. + * \param array_map a map from names to NDArrays. + */ + static void Save(const std::string& file_name, const std::map& array_map); + /*! + * \brief save a list of NDArrays to binary file. + * \param file_name name of the binary file. + * \param array_list a list of NDArrays. + */ + static void Save(const std::string& file_name, const std::vector& array_list); + /*! + * \return the size of current NDArray, a.k.a. the production of all shape dims + */ size_t Size() const; /*! - * \return the shape of current NDArray, in the form of mx_uint vector - */ + * \return the shape of current NDArray, in the form of mx_uint vector + */ std::vector GetShape() const; /*! - * \return the data type of current NDArray - */ + * \return the data type of current NDArray + */ int GetDType() const; /*! - * \brief Get the pointer to data (IMPORTANT: The ndarray should not be in GPU) - * \return the data pointer to the current NDArray - */ - const mx_float *GetData() const; + * \brief Get the pointer to data (IMPORTANT: The ndarray should not be in GPU) + * \return the data pointer to the current NDArray + */ + const mx_float* GetData() const; /*! - * \return the context of NDArray - */ + * \return the context of NDArray + */ Context GetContext() const; /*! - * \return the NDArrayHandle of the current NDArray - */ - NDArrayHandle GetHandle() const { return blob_ptr_->handle_; } + * \return the NDArrayHandle of the current NDArray + */ + NDArrayHandle GetHandle() const { + return blob_ptr_->handle_; + } private: std::shared_ptr blob_ptr_; }; -std::ostream& operator<<(std::ostream& out, const NDArray &ndarray); +std::ostream& operator<<(std::ostream& out, const NDArray& ndarray); } // namespace cpp } // namespace mxnet diff --git a/cpp-package/include/mxnet-cpp/op_map.h b/cpp-package/include/mxnet-cpp/op_map.h index b54cc0ae2c01..fd6944733470 100644 --- a/cpp-package/include/mxnet-cpp/op_map.h +++ b/cpp-package/include/mxnet-cpp/op_map.h @@ -18,10 +18,10 @@ */ /*! -* \file op_map.h -* \brief definition of OpMap -* \author Chuntao Hong -*/ + * \file op_map.h + * \brief definition of OpMap + * \author Chuntao Hong + */ #ifndef MXNET_CPP_OP_MAP_H_ #define MXNET_CPP_OP_MAP_H_ @@ -35,38 +35,42 @@ namespace mxnet { namespace cpp { /*! -* \brief OpMap instance holds a map of all the symbol creators so we can -* get symbol creators by name. -* This is used internally by Symbol and Operator. -*/ + * \brief OpMap instance holds a map of all the symbol creators so we can + * get symbol creators by name. + * This is used internally by Symbol and Operator. + */ class OpMap { public: /*! - * \brief Create an Mxnet instance - */ + * \brief Create an Mxnet instance + */ inline OpMap() { - mx_uint num_symbol_creators = 0; - AtomicSymbolCreator *symbol_creators = nullptr; - int r = - MXSymbolListAtomicSymbolCreators(&num_symbol_creators, &symbol_creators); + mx_uint num_symbol_creators = 0; + AtomicSymbolCreator* symbol_creators = nullptr; + int r = MXSymbolListAtomicSymbolCreators(&num_symbol_creators, &symbol_creators); CHECK_EQ(r, 0); for (mx_uint i = 0; i < num_symbol_creators; i++) { - const char *name; - const char *description; + const char* name; + const char* description; mx_uint num_args; - const char **arg_names; - const char **arg_type_infos; - const char **arg_descriptions; - const char *key_var_num_args; - r = MXSymbolGetAtomicSymbolInfo(symbol_creators[i], &name, &description, - &num_args, &arg_names, &arg_type_infos, - &arg_descriptions, &key_var_num_args); + const char** arg_names; + const char** arg_type_infos; + const char** arg_descriptions; + const char* key_var_num_args; + r = MXSymbolGetAtomicSymbolInfo(symbol_creators[i], + &name, + &description, + &num_args, + &arg_names, + &arg_type_infos, + &arg_descriptions, + &key_var_num_args); CHECK_EQ(r, 0); symbol_creators_[name] = symbol_creators[i]; } nn_uint num_ops; - const char **op_names; + const char** op_names; r = NNListAllOpNames(&num_ops, &op_names); CHECK_EQ(r, 0); for (nn_uint i = 0; i < num_ops; i++) { @@ -78,24 +82,24 @@ class OpMap { } /*! - * \brief Get a symbol creator with its name. - * - * \param name name of the symbol creator - * \return handle to the symbol creator - */ - inline AtomicSymbolCreator GetSymbolCreator(const std::string &name) { + * \brief Get a symbol creator with its name. + * + * \param name name of the symbol creator + * \return handle to the symbol creator + */ + inline AtomicSymbolCreator GetSymbolCreator(const std::string& name) { if (symbol_creators_.count(name) == 0) return GetOpHandle(name); return symbol_creators_[name]; } /*! - * \brief Get an op handle with its name. - * - * \param name name of the op - * \return handle to the op - */ - inline OpHandle GetOpHandle(const std::string &name) { + * \brief Get an op handle with its name. + * + * \param name name of the op + * \return handle to the op + */ + inline OpHandle GetOpHandle(const std::string& name) { return op_handles_[name]; } diff --git a/cpp-package/include/mxnet-cpp/op_suppl.h b/cpp-package/include/mxnet-cpp/op_suppl.h index 52cdae772a68..d72b83c11671 100644 --- a/cpp-package/include/mxnet-cpp/op_suppl.h +++ b/cpp-package/include/mxnet-cpp/op_suppl.h @@ -18,10 +18,10 @@ */ /*! -* \file op_suppl.h -* \brief A supplement and amendment of the operators from op.h -* \author Zhang Chen, zhubuntu, Xin Li -*/ + * \file op_suppl.h + * \brief A supplement and amendment of the operators from op.h + * \author Zhang Chen, zhubuntu, Xin Li + */ #ifndef MXNET_CPP_OP_SUPPL_H_ #define MXNET_CPP_OP_SUPPL_H_ @@ -38,118 +38,85 @@ namespace mxnet { namespace cpp { inline Symbol _Plus(Symbol lhs, Symbol rhs) { - return Operator("_Plus")(lhs, rhs) - .CreateSymbol(); + return Operator("_Plus")(lhs, rhs).CreateSymbol(); } inline Symbol _Mul(Symbol lhs, Symbol rhs) { - return Operator("_Mul")(lhs, rhs) - .CreateSymbol(); + return Operator("_Mul")(lhs, rhs).CreateSymbol(); } inline Symbol _Minus(Symbol lhs, Symbol rhs) { - return Operator("_Minus")(lhs, rhs) - .CreateSymbol(); + return Operator("_Minus")(lhs, rhs).CreateSymbol(); } inline Symbol _Div(Symbol lhs, Symbol rhs) { - return Operator("_Div")(lhs, rhs) - .CreateSymbol(); + return Operator("_Div")(lhs, rhs).CreateSymbol(); } inline Symbol _Mod(Symbol lhs, Symbol rhs) { - return Operator("_Mod")(lhs, rhs) - .CreateSymbol(); + return Operator("_Mod")(lhs, rhs).CreateSymbol(); } inline Symbol _Power(Symbol lhs, Symbol rhs) { - return Operator("_Power")(lhs, rhs) - .CreateSymbol(); + return Operator("_Power")(lhs, rhs).CreateSymbol(); } inline Symbol _Maximum(Symbol lhs, Symbol rhs) { - return Operator("_Maximum")(lhs, rhs) - .CreateSymbol(); + return Operator("_Maximum")(lhs, rhs).CreateSymbol(); } inline Symbol _Minimum(Symbol lhs, Symbol rhs) { - return Operator("_Minimum")(lhs, rhs) - .CreateSymbol(); + return Operator("_Minimum")(lhs, rhs).CreateSymbol(); } inline Symbol _PlusScalar(Symbol lhs, mx_float scalar) { - return Operator("_PlusScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_PlusScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _MinusScalar(Symbol lhs, mx_float scalar) { - return Operator("_MinusScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_MinusScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _RMinusScalar(mx_float scalar, Symbol rhs) { - return Operator("_RMinusScalar")(rhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_RMinusScalar")(rhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _MulScalar(Symbol lhs, mx_float scalar) { - return Operator("_MulScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_MulScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _DivScalar(Symbol lhs, mx_float scalar) { - return Operator("_DivScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_DivScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _RDivScalar(mx_float scalar, Symbol rhs) { - return Operator("_RDivScalar")(rhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_RDivScalar")(rhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _ModScalar(Symbol lhs, mx_float scalar) { - return Operator("_ModScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_ModScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _RModScalar(mx_float scalar, Symbol rhs) { - return Operator("_RModScalar")(rhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_RModScalar")(rhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _PowerScalar(Symbol lhs, mx_float scalar) { - return Operator("_PowerScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_PowerScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _RPowerScalar(mx_float scalar, Symbol rhs) { - return Operator("_RPowerScalar")(rhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_RPowerScalar")(rhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _MaximumScalar(Symbol lhs, mx_float scalar) { - return Operator("_MaximumScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_MaximumScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } inline Symbol _MinimumScalar(Symbol lhs, mx_float scalar) { - return Operator("_MinimumScalar")(lhs) - .SetParam("scalar", scalar) - .CreateSymbol(); + return Operator("_MinimumScalar")(lhs).SetParam("scalar", scalar).CreateSymbol(); } // TODO(zhangcheng-qinyinghua) // make crop function run in op.h // This function is due to [zhubuntu](https://github.com/zhubuntu) inline Symbol Crop(const std::string& symbol_name, - int num_args, - Symbol data, - Symbol crop_like, - Shape offset = Shape(0, 0), - Shape h_w = Shape(0, 0), - bool center_crop = false) { + int num_args, + Symbol data, + Symbol crop_like, + Shape offset = Shape(0, 0), + Shape h_w = Shape(0, 0), + bool center_crop = false) { return Operator("Crop") - .SetParam("num_args", num_args) - .SetParam("offset", offset) - .SetParam("h_w", h_w) - .SetParam("center_crop", center_crop) - .SetInput("arg0", data) - .SetInput("arg1", crop_like) - .CreateSymbol(symbol_name); + .SetParam("num_args", num_args) + .SetParam("offset", offset) + .SetParam("h_w", h_w) + .SetParam("center_crop", center_crop) + .SetInput("arg0", data) + .SetInput("arg1", crop_like) + .CreateSymbol(symbol_name); } - /*! * \brief Apply activation function to input. * Softmax Activation is only available with CUDNN on GPUand will be @@ -159,21 +126,16 @@ inline Symbol Crop(const std::string& symbol_name, * \param act_type Activation function to be applied. * \return new symbol */ -inline Symbol Activation(const std::string& symbol_name, - Symbol data, - const std::string& act_type) { - assert(act_type == "relu" || - act_type == "sigmoid" || - act_type == "softrelu" || +inline Symbol Activation(const std::string& symbol_name, Symbol data, const std::string& act_type) { + assert(act_type == "relu" || act_type == "sigmoid" || act_type == "softrelu" || act_type == "tanh"); return Operator("Activation") - .SetParam("act_type", act_type.c_str()) - .SetInput("data", data) - .CreateSymbol(symbol_name); + .SetParam("act_type", act_type.c_str()) + .SetInput("data", data) + .CreateSymbol(symbol_name); } } // namespace cpp } // namespace mxnet #endif // MXNET_CPP_OP_SUPPL_H_ - diff --git a/cpp-package/include/mxnet-cpp/op_util.h b/cpp-package/include/mxnet-cpp/op_util.h index 20e06a851814..616bbbb44886 100644 --- a/cpp-package/include/mxnet-cpp/op_util.h +++ b/cpp-package/include/mxnet-cpp/op_util.h @@ -18,10 +18,10 @@ */ /*! -* \file op_util.h -* \brief operator helper functions -* \author Chris Olivier -*/ + * \file op_util.h + * \brief operator helper functions + * \author Chris Olivier + */ #ifndef MXNET_CPP_OP_UTIL_H_ #define MXNET_CPP_OP_UTIL_H_ @@ -45,12 +45,12 @@ inline ::caffe::LayerParameter textToCaffeLayerParameter(const std::string& text return ::caffe::LayerParameter(np.layer(0)); } -template -inline StreamType& operator << (StreamType& os, const ::caffe::LayerParameter& op) { +template +inline StreamType& operator<<(StreamType& os, const ::caffe::LayerParameter& op) { std::string s; caffe::NetParameter np; // Avoid wasting time making a copy -- just push in out default object's pointer - np.mutable_layer()->AddAllocated(const_cast<::caffe::LayerParameter *>(&op)); + np.mutable_layer()->AddAllocated(const_cast<::caffe::LayerParameter*>(&op)); google::protobuf::TextFormat::PrintToString(np, &s); np.mutable_layer()->ReleaseLast(); os << s; diff --git a/cpp-package/include/mxnet-cpp/operator.h b/cpp-package/include/mxnet-cpp/operator.h index e8dad12d6053..64c283c3d497 100644 --- a/cpp-package/include/mxnet-cpp/operator.h +++ b/cpp-package/include/mxnet-cpp/operator.h @@ -18,10 +18,10 @@ */ /*! -* \file operator.h -* \brief definition of operator -* \author Chuntao Hong, Zhang Chen -*/ + * \file operator.h + * \brief definition of operator + * \author Chuntao Hong, Zhang Chen + */ #ifndef MXNET_CPP_OPERATOR_H_ #define MXNET_CPP_OPERATOR_H_ @@ -37,24 +37,24 @@ namespace mxnet { namespace cpp { class Mxnet; /*! -* \brief Operator interface -*/ + * \brief Operator interface + */ class Operator { public: /*! - * \brief Operator constructor - * \param operator_name type of the operator - */ - explicit Operator(const std::string &operator_name); - Operator &operator=(const Operator &rhs); - /*! - * \brief set config parameters - * \param name name of the config parameter - * \param value value of the config parameter - * \return reference of self - */ + * \brief Operator constructor + * \param operator_name type of the operator + */ + explicit Operator(const std::string& operator_name); + Operator& operator=(const Operator& rhs); + /*! + * \brief set config parameters + * \param name name of the config parameter + * \param value value of the config parameter + * \return reference of self + */ template - Operator &SetParam(const std::string &name, const T &value) { + Operator& SetParam(const std::string& name, const T& value) { std::string value_str; std::stringstream ss; ss << value; @@ -64,13 +64,13 @@ class Operator { return *this; } /*! - * \brief set config parameters from positional inputs - * \param pos the position of parameter - * \param value value of the config parameter - * \return reference of self - */ + * \brief set config parameters from positional inputs + * \param pos the position of parameter + * \param value value of the config parameter + * \return reference of self + */ template - Operator &SetParam(int pos, const T &value) { + Operator& SetParam(int pos, const T& value) { std::string value_str; std::stringstream ss; ss << value; @@ -80,117 +80,119 @@ class Operator { return *this; } /*! - * \brief add an input symbol - * \param name name of the input symbol - * \param symbol the input symbol - * \return reference of self - */ - Operator &SetInput(const std::string &name, const Symbol &symbol); - /*! - * \brief add an input symbol - * \param symbol the input symbol - */ - template - void PushInput(const Symbol &symbol) { + * \brief add an input symbol + * \param name name of the input symbol + * \param symbol the input symbol + * \return reference of self + */ + Operator& SetInput(const std::string& name, const Symbol& symbol); + /*! + * \brief add an input symbol + * \param symbol the input symbol + */ + template + void PushInput(const Symbol& symbol) { input_symbols_.push_back(symbol.GetHandle()); } /*! - * \brief add input symbols - * \return reference of self - */ - Operator &operator()() { return *this; } + * \brief add input symbols + * \return reference of self + */ + Operator& operator()() { + return *this; + } /*! - * \brief add input symbols - * \param symbol the input symbol - * \return reference of self - */ - Operator &operator()(const Symbol &symbol) { + * \brief add input symbols + * \param symbol the input symbol + * \return reference of self + */ + Operator& operator()(const Symbol& symbol) { input_symbols_.push_back(symbol.GetHandle()); return *this; } /*! - * \brief add a list of input symbols - * \param symbols the vector of the input symbols - * \return reference of self - */ - Operator &operator()(const std::vector &symbols) { - for (auto &s : symbols) { + * \brief add a list of input symbols + * \param symbols the vector of the input symbols + * \return reference of self + */ + Operator& operator()(const std::vector& symbols) { + for (auto& s : symbols) { input_symbols_.push_back(s.GetHandle()); } return *this; } /*! - * \brief create a Symbol from the current operator - * \param name the name of the operator - * \return the operator Symbol - */ - Symbol CreateSymbol(const std::string &name = ""); + * \brief create a Symbol from the current operator + * \param name the name of the operator + * \return the operator Symbol + */ + Symbol CreateSymbol(const std::string& name = ""); /*! - * \brief add an input ndarray - * \param name name of the input ndarray - * \param ndarray the input ndarray - * \return reference of self - */ - Operator &SetInput(const std::string &name, const NDArray &ndarray); - /*! - * \brief add an input ndarray - * \param ndarray the input ndarray - */ - template - Operator &PushInput(const NDArray &ndarray) { + * \brief add an input ndarray + * \param name name of the input ndarray + * \param ndarray the input ndarray + * \return reference of self + */ + Operator& SetInput(const std::string& name, const NDArray& ndarray); + /*! + * \brief add an input ndarray + * \param ndarray the input ndarray + */ + template + Operator& PushInput(const NDArray& ndarray) { input_ndarrays_.push_back(ndarray.GetHandle()); return *this; } /*! - * \brief add positional inputs - */ + * \brief add positional inputs + */ template - Operator &PushInput(const T &t, Args... args) { + Operator& PushInput(const T& t, Args... args) { SetParam(N, t); - PushInput(args...); + PushInput(args...); return *this; } /*! - * \brief add the last positional input - */ + * \brief add the last positional input + */ template - Operator &PushInput(const T &t) { + Operator& PushInput(const T& t) { SetParam(N, t); return *this; } /*! - * \brief add input ndarrays - * \param ndarray the input ndarray - * \return reference of self - */ - Operator &operator()(const NDArray &ndarray) { + * \brief add input ndarrays + * \param ndarray the input ndarray + * \return reference of self + */ + Operator& operator()(const NDArray& ndarray) { input_ndarrays_.push_back(ndarray.GetHandle()); return *this; } /*! - * \brief add a list of input ndarrays - * \param ndarrays the vector of the input ndarrays - * \return reference of self - */ - Operator &operator()(const std::vector &ndarrays) { - for (auto &s : ndarrays) { + * \brief add a list of input ndarrays + * \param ndarrays the vector of the input ndarrays + * \return reference of self + */ + Operator& operator()(const std::vector& ndarrays) { + for (auto& s : ndarrays) { input_ndarrays_.push_back(s.GetHandle()); } return *this; } /*! - * \brief add input ndarrays - * \return reference of self - */ + * \brief add input ndarrays + * \return reference of self + */ template - Operator &operator()(Args... args) { + Operator& operator()(Args... args) { PushInput(args...); return *this; } std::vector Invoke(); - void Invoke(NDArray &output); - void Invoke(std::vector &outputs); + void Invoke(NDArray& output); + void Invoke(std::vector& outputs); private: std::map params_desc_; diff --git a/cpp-package/include/mxnet-cpp/optimizer.h b/cpp-package/include/mxnet-cpp/optimizer.h index 118c10ae12d9..b853703c5f6b 100644 --- a/cpp-package/include/mxnet-cpp/optimizer.h +++ b/cpp-package/include/mxnet-cpp/optimizer.h @@ -18,10 +18,10 @@ */ /*! -* \file optimizer.h -* \brief definition of optimizer -* \author Chuntao Hong, Zhang Chen -*/ + * \file optimizer.h + * \brief definition of optimizer + * \author Chuntao Hong, Zhang Chen + */ #ifndef MXNET_CPP_OPTIMIZER_H_ #define MXNET_CPP_OPTIMIZER_H_ @@ -42,32 +42,32 @@ namespace mxnet { namespace cpp { /*! -* \brief Optimizer interface -*/ + * \brief Optimizer interface + */ class Optimizer { public: /*! - * \brief constructor - * \param beign_num_update The initial number of updates - */ + * \brief constructor + * \param beign_num_update The initial number of updates + */ explicit Optimizer(unsigned begin_num_update); /*! - * \brief get optimizer type - * \return string of optimizer type - */ + * \brief get optimizer type + * \return string of optimizer type + */ virtual std::string GetType() const = 0; /*! - * \brief destructor - */ + * \brief destructor + */ virtual ~Optimizer(); /*! - * \brief set config parameters - * \param name name of the config parameter - * \param value value of the config parameter - * \return reference of self - */ + * \brief set config parameters + * \param name name of the config parameter + * \param value value of the config parameter + * \return reference of self + */ template - Optimizer *SetParam(const std::string &name, const T &value) { + Optimizer* SetParam(const std::string& name, const T& value) { std::string value_str; std::stringstream ss; ss << value; @@ -77,22 +77,22 @@ class Optimizer { return this; } /*! - * \bried set the lr scheduler - * \param lrScheduler lr scheduler used for this optimizer - * \return reference if self - */ - Optimizer *SetLRScheduler(std::unique_ptr lrScheduler) { + * \bried set the lr scheduler + * \param lrScheduler lr scheduler used for this optimizer + * \return reference if self + */ + Optimizer* SetLRScheduler(std::unique_ptr lrScheduler) { CHECK(lrScheduler); lrScheduler_ = std::move(lrScheduler); lrScheduler_->SetLR(dmlc::stof(params_["lr"])); return this; } /*! - * \brief Update a weight with gradient. - * \param index the unique index for the weight. - * \param weight the weight to update. - * \param grad gradient for the weight. - */ + * \brief Update a weight with gradient. + * \param index the unique index for the weight. + * \param weight the weight to update. + * \param grad gradient for the weight. + */ virtual void Update(int index, NDArray weight, NDArray grad) = 0; // TODO(zhangcheng-qinyinghua) // implement Update a list of arrays, maybe in the form of map @@ -100,9 +100,9 @@ class Optimizer { // grad, mx_float lr); /*! - * \brief Serialize the optimizer parameters to a string. - * \return serialization - */ + * \brief Serialize the optimizer parameters to a string. + * \return serialization + */ std::string Serialize() const; protected: @@ -125,19 +125,21 @@ class OptimizerRegistry { public: static Optimizer* Find(const std::string& name); static int __REGISTER__(const std::string& name, OptimizerCreator creator); + private: static std::map& cmap(); - OptimizerRegistry() = delete; + OptimizerRegistry() = delete; ~OptimizerRegistry() = delete; }; -#define MXNETCPP_REGISTER_OPTIMIZER(Name, OptimizerType)\ - OptimizerRegistry::__REGISTER__(#Name, [](){return new OptimizerType();}) +#define MXNETCPP_REGISTER_OPTIMIZER(Name, OptimizerType) \ + OptimizerRegistry::__REGISTER__(#Name, []() { return new OptimizerType(); }) class SGDOptimizer : public Optimizer { public: explicit SGDOptimizer(unsigned begin_num_update = 0); std::string GetType() const override; void Update(int index, NDArray weight, NDArray grad) override; + private: virtual ~SGDOptimizer(); void CreateState_(int index, NDArray weight) override; @@ -151,6 +153,7 @@ class SignumOptimizer : public Optimizer { explicit SignumOptimizer(unsigned begin_num_update = 0); std::string GetType() const override; void Update(int index, NDArray weight, NDArray grad) override; + private: virtual ~SignumOptimizer(); void CreateState_(int index, NDArray weight) override; @@ -159,12 +162,12 @@ class SignumOptimizer : public Optimizer { AtomicSymbolCreator mom_update_handle_; }; - class RMSPropOptimizer : public Optimizer { public: explicit RMSPropOptimizer(unsigned begin_num_update = 0); std::string GetType() const override; void Update(int index, NDArray weight, NDArray grad) override; + private: virtual ~RMSPropOptimizer(); void CreateState_(int index, NDArray weight) override; @@ -178,6 +181,7 @@ class AdamOptimizer : public Optimizer { explicit AdamOptimizer(unsigned begin_num_update = 0); std::string GetType() const override; void Update(int index, NDArray weight, NDArray grad) override; + private: virtual ~AdamOptimizer(); void CreateState_(int index, NDArray weight) override; @@ -191,6 +195,7 @@ class AdaGradOptimizer : public Optimizer { explicit AdaGradOptimizer(unsigned begin_num_update = 0); std::string GetType() const override; void Update(int index, NDArray weight, NDArray grad) override; + private: virtual ~AdaGradOptimizer(); void CreateState_(int index, NDArray weight) override; @@ -202,6 +207,7 @@ class AdaDeltaOptimizer : public Optimizer { explicit AdaDeltaOptimizer(unsigned begin_num_update = 0); std::string GetType() const override; void Update(int index, NDArray weight, NDArray grad) override; + private: virtual ~AdaDeltaOptimizer(); void CreateState_(int index, NDArray weight) override; diff --git a/cpp-package/include/mxnet-cpp/shape.h b/cpp-package/include/mxnet-cpp/shape.h index 44a10828a366..6d70862a09c3 100644 --- a/cpp-package/include/mxnet-cpp/shape.h +++ b/cpp-package/include/mxnet-cpp/shape.h @@ -18,10 +18,10 @@ */ /*! -* \file shape.h -* \brief definition of shape -* \author Chuntao Hong, Zhang Chen -*/ + * \file shape.h + * \brief definition of shape + * \author Chuntao Hong, Zhang Chen + */ #ifndef MXNET_CPP_SHAPE_H_ #define MXNET_CPP_SHAPE_H_ @@ -36,167 +36,155 @@ namespace mxnet { namespace cpp { /*! -* \brief dynamic shape class that can hold shape -* of arbirary dimension -*/ + * \brief dynamic shape class that can hold shape + * of arbirary dimension + */ struct Shape { public: /*! \brief constructor */ - Shape() - : ndim_(0), - num_heap_allocated_(0), - data_heap_(nullptr) {} + Shape() : ndim_(0), num_heap_allocated_(0), data_heap_(nullptr) {} /*! - * \brief constructor from a vector of index_t - * \param v the vector - */ - explicit Shape(const std::vector &v) - : ndim_(v.size()) { + * \brief constructor from a vector of index_t + * \param v the vector + */ + explicit Shape(const std::vector& v) : ndim_(v.size()) { if (ndim_ <= kStackCache) { - data_heap_ = nullptr; + data_heap_ = nullptr; num_heap_allocated_ = 0; std::copy(v.begin(), v.end(), data_stack_); } else { - data_heap_ = new index_t[ndim_]; + data_heap_ = new index_t[ndim_]; num_heap_allocated_ = ndim_; std::copy(v.begin(), v.end(), data_heap_); } } /*! - * \brief constructor one dimmension shape - * \param s1 size of the first dimmension - */ - explicit Shape(index_t s1) - : ndim_(1) { + * \brief constructor one dimmension shape + * \param s1 size of the first dimmension + */ + explicit Shape(index_t s1) : ndim_(1) { if (ndim_ <= kStackCache) { - data_heap_ = nullptr; + data_heap_ = nullptr; num_heap_allocated_ = 0; - data_stack_[0] = s1; + data_stack_[0] = s1; } else { - data_heap_ = new index_t[ndim_]; + data_heap_ = new index_t[ndim_]; num_heap_allocated_ = ndim_; - data_heap_[0] = s1; + data_heap_[0] = s1; } } /*! - * \brief constructor two dimmension shape - * \param s1 size of the first dimmension - * \param s2 size of the second dimmension - */ - Shape(index_t s1, index_t s2) - : ndim_(2) { + * \brief constructor two dimmension shape + * \param s1 size of the first dimmension + * \param s2 size of the second dimmension + */ + Shape(index_t s1, index_t s2) : ndim_(2) { if (ndim_ <= kStackCache) { - data_heap_ = nullptr; + data_heap_ = nullptr; num_heap_allocated_ = 0; - data_stack_[0] = s1; - data_stack_[1] = s2; + data_stack_[0] = s1; + data_stack_[1] = s2; } else { - data_heap_ = new index_t[ndim_]; + data_heap_ = new index_t[ndim_]; num_heap_allocated_ = ndim_; - data_heap_[0] = s1; - data_heap_[1] = s2; + data_heap_[0] = s1; + data_heap_[1] = s2; } } /*! - * \brief constructor three dimmension shape - * \param s1 size of the first dimmension - * \param s2 size of the second dimmension - * \param s3 size of the third dimmension - */ - Shape(index_t s1, index_t s2, index_t s3) - : ndim_(3) { + * \brief constructor three dimmension shape + * \param s1 size of the first dimmension + * \param s2 size of the second dimmension + * \param s3 size of the third dimmension + */ + Shape(index_t s1, index_t s2, index_t s3) : ndim_(3) { if (ndim_ <= kStackCache) { - data_heap_ = nullptr; + data_heap_ = nullptr; num_heap_allocated_ = 0; - data_stack_[0] = s1; - data_stack_[1] = s2; - data_stack_[2] = s3; + data_stack_[0] = s1; + data_stack_[1] = s2; + data_stack_[2] = s3; } else { - data_heap_ = new index_t[ndim_]; + data_heap_ = new index_t[ndim_]; num_heap_allocated_ = ndim_; - data_heap_[0] = s1; - data_heap_[1] = s2; - data_heap_[2] = s3; + data_heap_[0] = s1; + data_heap_[1] = s2; + data_heap_[2] = s3; } } /*! - * \brief constructor four dimmension shape - * \param s1 size of the first dimmension - * \param s2 size of the second dimmension - * \param s3 size of the third dimmension - * \param s4 size of the fourth dimmension - */ - Shape(index_t s1, index_t s2, index_t s3, index_t s4) - : ndim_(4) { + * \brief constructor four dimmension shape + * \param s1 size of the first dimmension + * \param s2 size of the second dimmension + * \param s3 size of the third dimmension + * \param s4 size of the fourth dimmension + */ + Shape(index_t s1, index_t s2, index_t s3, index_t s4) : ndim_(4) { if (ndim_ <= kStackCache) { - data_heap_ = nullptr; + data_heap_ = nullptr; num_heap_allocated_ = 0; - data_stack_[0] = s1; - data_stack_[1] = s2; - data_stack_[2] = s3; - data_stack_[3] = s4; + data_stack_[0] = s1; + data_stack_[1] = s2; + data_stack_[2] = s3; + data_stack_[3] = s4; } else { - data_heap_ = new index_t[ndim_]; + data_heap_ = new index_t[ndim_]; num_heap_allocated_ = ndim_; - data_heap_[0] = s1; - data_heap_[1] = s2; - data_heap_[2] = s3; - data_heap_[3] = s4; + data_heap_[0] = s1; + data_heap_[1] = s2; + data_heap_[2] = s3; + data_heap_[3] = s4; } } /*! - * \brief constructor five dimmension shape - * \param s1 size of the first dimmension - * \param s2 size of the second dimmension - * \param s3 size of the third dimmension - * \param s4 size of the fourth dimmension - * \param s5 size of the fifth dimmension - */ - Shape(index_t s1, index_t s2, index_t s3, index_t s4, index_t s5) - : ndim_(5) { + * \brief constructor five dimmension shape + * \param s1 size of the first dimmension + * \param s2 size of the second dimmension + * \param s3 size of the third dimmension + * \param s4 size of the fourth dimmension + * \param s5 size of the fifth dimmension + */ + Shape(index_t s1, index_t s2, index_t s3, index_t s4, index_t s5) : ndim_(5) { if (ndim_ <= kStackCache) { - data_heap_ = nullptr; + data_heap_ = nullptr; num_heap_allocated_ = 0; - data_stack_[0] = s1; - data_stack_[1] = s2; - data_stack_[2] = s3; - data_stack_[3] = s4; - data_stack_[4] = s5; + data_stack_[0] = s1; + data_stack_[1] = s2; + data_stack_[2] = s3; + data_stack_[3] = s4; + data_stack_[4] = s5; } else { - data_heap_ = new index_t[ndim_]; + data_heap_ = new index_t[ndim_]; num_heap_allocated_ = ndim_; - data_heap_[0] = s1; - data_heap_[1] = s2; - data_heap_[2] = s3; - data_heap_[3] = s4; - data_heap_[4] = s5; + data_heap_[0] = s1; + data_heap_[1] = s2; + data_heap_[2] = s3; + data_heap_[3] = s4; + data_heap_[4] = s5; } } /*! - * \brief constructor from Shape - * \param s the source shape - */ - Shape(const Shape &s) - : ndim_(s.ndim_) { + * \brief constructor from Shape + * \param s the source shape + */ + Shape(const Shape& s) : ndim_(s.ndim_) { if (ndim_ <= kStackCache) { - data_heap_ = nullptr; + data_heap_ = nullptr; num_heap_allocated_ = 0; std::copy(s.data_stack_, s.data_stack_ + ndim_, data_stack_); } else { - data_heap_ = new index_t[ndim_]; + data_heap_ = new index_t[ndim_]; num_heap_allocated_ = ndim_; std::copy(s.data_heap_, s.data_heap_ + ndim_, data_heap_); } } #if MSHADOW_IN_CXX11 /*! - * \brief move constructor from Shape - * \param s the source shape - */ - Shape(Shape &&s) - : ndim_(s.ndim_), - num_heap_allocated_(s.num_heap_allocated_), - data_heap_(s.data_heap_) { + * \brief move constructor from Shape + * \param s the source shape + */ + Shape(Shape&& s) + : ndim_(s.ndim_), num_heap_allocated_(s.num_heap_allocated_), data_heap_(s.data_heap_) { if (ndim_ <= kStackCache) { std::copy(s.data_stack_, s.data_stack_ + ndim_, data_stack_); } @@ -210,43 +198,42 @@ struct Shape { delete[] data_heap_; } /*! - * \brief copy shape from content betwen two iterators - * \param begin the beginning of iterator - * \param end the end of the iterator - * \tparam RandomAccessIterator iterator type - */ - template - inline void CopyFrom(RandomAccessIterator begin, - RandomAccessIterator end) { + * \brief copy shape from content betwen two iterators + * \param begin the beginning of iterator + * \param end the end of the iterator + * \tparam RandomAccessIterator iterator type + */ + template + inline void CopyFrom(RandomAccessIterator begin, RandomAccessIterator end) { this->SetDim(end - begin); std::copy(begin, end, data()); } /*! - * \brief assignment from shape - * \param shape source shape - * \return reference of self - */ - inline Shape &operator=(const Shape &shape) { + * \brief assignment from shape + * \param shape source shape + * \return reference of self + */ + inline Shape& operator=(const Shape& shape) { this->SetDim(shape.ndim_); - const index_t *src = shape.data(); + const index_t* src = shape.data(); std::copy(src, src + ndim_, data()); return *this; } /*! - * \brief assignment from vector - * \param shape source shape - * \return reference of self - */ - inline Shape &operator=(const std::vector &shape) { + * \brief assignment from vector + * \param shape source shape + * \return reference of self + */ + inline Shape& operator=(const std::vector& shape) { this->CopyFrom(shape.begin(), shape.end()); return *this; } /*! \return the data content of the shape */ - inline const index_t *data() const { + inline const index_t* data() const { return ndim_ <= kStackCache ? data_stack_ : data_heap_; } /*! \return the data content of the shape */ - inline index_t *data() { + inline index_t* data() { return ndim_ <= kStackCache ? data_stack_ : data_heap_; } /*! \brief return number of dimension of the tensor inside */ @@ -254,57 +241,60 @@ struct Shape { return ndim_; } /*! - * \brief get corresponding index - * \param i dimension index - * \return the corresponding dimension size - */ - inline index_t &operator[](index_t i) { + * \brief get corresponding index + * \param i dimension index + * \return the corresponding dimension size + */ + inline index_t& operator[](index_t i) { return data()[i]; } /*! - * \brief get corresponding index - * \param i dimension index - * \return the corresponding dimension size - */ - inline const index_t &operator[](index_t i) const { + * \brief get corresponding index + * \param i dimension index + * \return the corresponding dimension size + */ + inline const index_t& operator[](index_t i) const { return data()[i]; } /*! \brief total number of elements in the tensor */ inline size_t Size(void) const { - size_t size = 1; - const index_t *d = this->data(); + size_t size = 1; + const index_t* d = this->data(); for (index_t i = 0; i < ndim_; ++i) { size *= d[i]; } return size; } /*! - * \return whether two shape equals - * \param s the shape to compare against - */ - inline bool operator==(const Shape &s) const { - if (ndim_ != s.ndim_) return false; + * \return whether two shape equals + * \param s the shape to compare against + */ + inline bool operator==(const Shape& s) const { + if (ndim_ != s.ndim_) + return false; if (ndim_ <= kStackCache) { for (index_t i = 0; i < ndim_; ++i) { - if (data_stack_[i] != s.data_stack_[i]) return false; + if (data_stack_[i] != s.data_stack_[i]) + return false; } } else { for (index_t i = 0; i < ndim_; ++i) { - if (data_heap_[i] != s.data_heap_[i]) return false; + if (data_heap_[i] != s.data_heap_[i]) + return false; } } return true; } /*! - * \return whether two shape not equals - * \param s the shape to compare against - */ - inline bool operator!=(const Shape &s) const { + * \return whether two shape not equals + * \param s the shape to compare against + */ + inline bool operator!=(const Shape& s) const { return !(*this == s); } - friend std::ostream &operator<<(std::ostream &os, const Shape &shape); - friend std::istream &operator>>(std::istream &is, Shape &shape); + friend std::ostream& operator<<(std::ostream& os, const Shape& shape); + friend std::istream& operator>>(std::istream& is, Shape& shape); private: // the shape will be stored in data_stack_ @@ -319,17 +309,16 @@ struct Shape { /*! \brief in stack space used to store shape when it is small */ index_t data_stack_[kStackCache]; /*! \brief space to store shape when dimension is big*/ - index_t *data_heap_; + index_t* data_heap_; /*! - * \brief internal function to set the dimension - * \param dim the dimension of the shape - */ + * \brief internal function to set the dimension + * \param dim the dimension of the shape + */ inline void SetDim(index_t dim) { - if (dim > kStackCache && - dim > num_heap_allocated_) { + if (dim > kStackCache && dim > num_heap_allocated_) { // data_heap_ can be nullptr delete[] data_heap_; - data_heap_ = new index_t[dim]; + data_heap_ = new index_t[dim]; num_heap_allocated_ = dim; } ndim_ = dim; @@ -337,34 +326,37 @@ struct Shape { }; /*! -* \brief allow string printing of the shape -* \param os the output stream -* \param shape the shape -* \return the ostream -*/ -inline std::ostream &operator<<(std::ostream &os, const Shape &shape) { + * \brief allow string printing of the shape + * \param os the output stream + * \param shape the shape + * \return the ostream + */ +inline std::ostream& operator<<(std::ostream& os, const Shape& shape) { os << '('; for (index_t i = 0; i < shape.ndim(); ++i) { - if (i != 0) os << ','; + if (i != 0) + os << ','; os << static_cast(shape[i]); // Supports negative Shape 'special codes' for inferring } // python style tuple - if (shape.ndim() == 1) os << ','; + if (shape.ndim() == 1) + os << ','; os << ')'; return os; } /*! -* \brief read shape from the istream -* \param is the input stream -* \param shape the shape -* \return the istream -*/ -inline std::istream &operator>>(std::istream &is, Shape &shape) { + * \brief read shape from the istream + * \param is the input stream + * \param shape the shape + * \return the istream + */ +inline std::istream& operator>>(std::istream& is, Shape& shape) { // get ( while (true) { char ch = is.get(); - if (ch == '(') break; + if (ch == '(') + break; if (!isspace(ch)) { is.setstate(std::ios::failbit); return is; @@ -382,14 +374,17 @@ inline std::istream &operator>>(std::istream &is, Shape &shape) { while (true) { ch = is.peek(); if (isspace(ch)) { - is.get(); continue; + is.get(); + continue; } if (ch == ')') { - is.get(); break; + is.get(); + break; } break; } - if (ch == ')') break; + if (ch == ')') + break; } else if (ch == ')') { break; } else { diff --git a/cpp-package/include/mxnet-cpp/symbol.h b/cpp-package/include/mxnet-cpp/symbol.h index 8e94637908be..6d9e57471154 100644 --- a/cpp-package/include/mxnet-cpp/symbol.h +++ b/cpp-package/include/mxnet-cpp/symbol.h @@ -18,10 +18,10 @@ */ /*! -* \file symbol.h -* \brief definition of symbol -* \author Chuntao Hong, Zhang Chen -*/ + * \file symbol.h + * \brief definition of symbol + * \author Chuntao Hong, Zhang Chen + */ #ifndef MXNET_CPP_SYMBOL_H_ #define MXNET_CPP_SYMBOL_H_ @@ -39,58 +39,60 @@ namespace cpp { class Executor; /*! -* \brief struct to store SymbolHandle -*/ + * \brief struct to store SymbolHandle + */ struct SymBlob { public: /*! - * \brief default constructor - */ + * \brief default constructor + */ SymBlob() : handle_(nullptr) {} /*! - * \brief construct with SymbolHandle to store - */ + * \brief construct with SymbolHandle to store + */ explicit SymBlob(SymbolHandle handle) : handle_(handle) {} /*! - * \brief destructor, free the SymbolHandle - */ - ~SymBlob() { MXSymbolFree(handle_); } + * \brief destructor, free the SymbolHandle + */ + ~SymBlob() { + MXSymbolFree(handle_); + } /*! - * \brief the SymbolHandle to store - */ + * \brief the SymbolHandle to store + */ SymbolHandle handle_; private: - SymBlob(const SymBlob &); - SymBlob &operator=(const SymBlob &); + SymBlob(const SymBlob&); + SymBlob& operator=(const SymBlob&); }; /*! -* \brief Symbol interface -*/ + * \brief Symbol interface + */ class Symbol { public: Symbol() {} /*! - * \brief construct a Symbol with SymbolHandle - * \param handle the given SymbolHandle - */ + * \brief construct a Symbol with SymbolHandle + * \param handle the given SymbolHandle + */ explicit Symbol(SymbolHandle handle); /*! - * \brief construct a variable Symbol - * \param name the name of the variable - */ - explicit Symbol(const char *name); + * \brief construct a variable Symbol + * \param name the name of the variable + */ + explicit Symbol(const char* name); /*! - * \brief construct a variable Symbol - * \param name the name of the variable - */ - explicit Symbol(const std::string &name); - Symbol operator+(const Symbol &rhs) const; - Symbol operator-(const Symbol &rhs) const; - Symbol operator*(const Symbol &rhs) const; - Symbol operator/(const Symbol &rhs) const; - Symbol operator%(const Symbol &rhs) const; + * \brief construct a variable Symbol + * \param name the name of the variable + */ + explicit Symbol(const std::string& name); + Symbol operator+(const Symbol& rhs) const; + Symbol operator-(const Symbol& rhs) const; + Symbol operator*(const Symbol& rhs) const; + Symbol operator/(const Symbol& rhs) const; + Symbol operator%(const Symbol& rhs) const; Symbol operator+(mx_float scalar) const; Symbol operator-(mx_float scalar) const; @@ -99,79 +101,81 @@ class Symbol { Symbol operator%(mx_float scalar) const; Symbol Copy() const; /*! - * \brief construct a variable Symbol - * \param name the name of the variable - */ - static Symbol Variable(const std::string &name = ""); + * \brief construct a variable Symbol + * \param name the name of the variable + */ + static Symbol Variable(const std::string& name = ""); Symbol operator[](int index); - Symbol operator[](const std::string &index); + Symbol operator[](const std::string& index); /*! - * \brief Create a symbol that groups symbols together - * \param symbols List of symbols to be groupe - */ - static Symbol Group(const std::vector &symbols); + * \brief Create a symbol that groups symbols together + * \param symbols List of symbols to be groupe + */ + static Symbol Group(const std::vector& symbols); /*! - * \brief load Symbol from a JSON file - * \param file_name the name of the file - */ - static Symbol Load(const std::string &file_name); + * \brief load Symbol from a JSON file + * \param file_name the name of the file + */ + static Symbol Load(const std::string& file_name); /*! - * \brief load Symbol from a JSON string - * \param json_str the JSON string - */ - static Symbol LoadJSON(const std::string &json_str); + * \brief load Symbol from a JSON string + * \param json_str the JSON string + */ + static Symbol LoadJSON(const std::string& json_str); /*! - * \brief save Symbol to a file - * \param file_name the name of the file - */ - void Save(const std::string &file_name) const; + * \brief save Symbol to a file + * \param file_name the name of the file + */ + void Save(const std::string& file_name) const; /*! - * \brief save Symbol into a JSON string - */ + * \brief save Symbol into a JSON string + */ std::string ToJSON() const; /*! - * \brief save Symbol into a JSON string - * \retutrn the symbol whose outputs are all the internals. - */ + * \brief save Symbol into a JSON string + * \retutrn the symbol whose outputs are all the internals. + */ Symbol GetInternals() const; /*! - * \return the SymbolHandle - */ - SymbolHandle GetHandle() const { return (blob_ptr_) ? blob_ptr_->handle_: nullptr; } + * \return the SymbolHandle + */ + SymbolHandle GetHandle() const { + return (blob_ptr_) ? blob_ptr_->handle_ : nullptr; + } /*! - * \brief construct an operator Symbol, with given input Symbol and config - * \param name the name of the Symbol - * \param input_keys the vector of keys of the input - * \param input_values the vector of the intput Symbols - * \param config_keys the vector of keys of the config - * \param config_values the vecotr of values of the config - */ - Symbol(const std::string &operator_name, const std::string &name, - std::vector input_keys, + * \brief construct an operator Symbol, with given input Symbol and config + * \param name the name of the Symbol + * \param input_keys the vector of keys of the input + * \param input_values the vector of the intput Symbols + * \param config_keys the vector of keys of the config + * \param config_values the vecotr of values of the config + */ + Symbol(const std::string& operator_name, + const std::string& name, + std::vector input_keys, std::vector input_values, - std::vector config_keys, - std::vector config_values); + std::vector config_keys, + std::vector config_values); /*! - * \brief infer the shapes by providing shapes of known argument shapes. - * \param arg_shapes map of argument name to shape of arguments with known - * shapes. - * \param in_shapes used to store infered shapes of input arguments. - * \param out_shapes used to store infered shapes of outputs. - * \param aux_shapes use to store the infered shapes of auxiliary states - */ - void InferShape( - const std::map > &arg_shapes, - std::vector > *in_shape, - std::vector > *aux_shape, - std::vector > *out_shape) const; + * \brief infer the shapes by providing shapes of known argument shapes. + * \param arg_shapes map of argument name to shape of arguments with known + * shapes. + * \param in_shapes used to store infered shapes of input arguments. + * \param out_shapes used to store infered shapes of outputs. + * \param aux_shapes use to store the infered shapes of auxiliary states + */ + void InferShape(const std::map >& arg_shapes, + std::vector >* in_shape, + std::vector >* aux_shape, + std::vector >* out_shape) const; /*! - * \brief List the arguments names. - * - * The position of the returned list also corresponds to calling position in - *operator() - * \return the arguments list of this symbol, they can be either named or - *unnamed (empty string). - */ + * \brief List the arguments names. + * + * The position of the returned list also corresponds to calling position in + *operator() + * \return the arguments list of this symbol, they can be either named or + *unnamed (empty string). + */ std::vector ListArguments() const; /*! \return lists all argument names and aux states of the symbol */ std::vector ListInputs() const; @@ -199,101 +203,99 @@ class Symbol { /*! \return get the name of the symbol */ std::string GetName() const; /*! - * \brief infer and construct all the arrays to bind to executor by providing - * some known arrays. - * \param context the context of all the infered arrays - * \param arg_arrays infered input arguments arrays. - * \param arad_arrays infered arrays to store the gradient output of the input - * arguments. - * \param aux_arrays infered arrays that is used as internal state in op. - * \param args_map map of some given arguments arrays. - * \param args_grad_store map of some gradient given store arrays. - * \param args_req_type map of some given type of gradient saving. Can only be - * in {kNullOp, kAddTo, kWriteTo}. - * \param aux_map NDArray that stores the internal state in op - */ + * \brief infer and construct all the arrays to bind to executor by providing + * some known arrays. + * \param context the context of all the infered arrays + * \param arg_arrays infered input arguments arrays. + * \param arad_arrays infered arrays to store the gradient output of the input + * arguments. + * \param aux_arrays infered arrays that is used as internal state in op. + * \param args_map map of some given arguments arrays. + * \param args_grad_store map of some gradient given store arrays. + * \param args_req_type map of some given type of gradient saving. Can only be + * in {kNullOp, kAddTo, kWriteTo}. + * \param aux_map NDArray that stores the internal state in op + */ void InferExecutorArrays( - const Context &context, std::vector *arg_arrays, - std::vector *grad_arrays, std::vector *grad_reqs, - std::vector *aux_arrays, - const std::map &args_map, - const std::map &arg_grad_store = - std::map(), - const std::map &grad_req_type = - std::map(), - const std::map &aux_map = - std::map()) const; + const Context& context, + std::vector* arg_arrays, + std::vector* grad_arrays, + std::vector* grad_reqs, + std::vector* aux_arrays, + const std::map& args_map, + const std::map& arg_grad_store = std::map(), + const std::map& grad_req_type = std::map(), + const std::map& aux_map = std::map()) const; /*! - * \brief infer and construct all the input arguments arrays to bind to - * executor by providing some known arguments arrays. - * \param context the context of all the infered arrays. - * \param args_map map of all the infered input arguments arrays. - * \param known_args map of some given arguments arrays. - */ - void InferArgsMap(const Context &context, - std::map *args_map, - const std::map &known_args) const; + * \brief infer and construct all the input arguments arrays to bind to + * executor by providing some known arguments arrays. + * \param context the context of all the infered arrays. + * \param args_map map of all the infered input arguments arrays. + * \param known_args map of some given arguments arrays. + */ + void InferArgsMap(const Context& context, + std::map* args_map, + const std::map& known_args) const; /*! - * \brief Create an executor by bind symbol with context and arguments. - * If user do not want to compute the gradients of i-th argument, - *grad_req_type[i] can be kNullOp. - * The input arrays in the given maps should have the same name with the input - *symbol. - * Only need some of the necessary arrays, and the other arrays can be infered - *automatically. - * - * \param context the context of binding. - * \param args_map the NDArray that stores the input arguments to the symbol. - * \param arg_grad_store NDArray that is used to store the gradient output of - *the input arguments. - * \param grad_req_type requirment type of gradient saving. Can only be in - *{kNullOp, kAddTo, kWriteTo}. - * \param aux_map NDArray that stores the internal state in op - * \return a new executor, which need to be free manually. - */ - Executor *SimpleBind(const Context &context, - const std::map &args_map, - const std::map &arg_grad_store = - std::map(), - const std::map &grad_req_type = - std::map(), - const std::map &aux_map = - std::map()); + * \brief Create an executor by bind symbol with context and arguments. + * If user do not want to compute the gradients of i-th argument, + *grad_req_type[i] can be kNullOp. + * The input arrays in the given maps should have the same name with the input + *symbol. + * Only need some of the necessary arrays, and the other arrays can be infered + *automatically. + * + * \param context the context of binding. + * \param args_map the NDArray that stores the input arguments to the symbol. + * \param arg_grad_store NDArray that is used to store the gradient output of + *the input arguments. + * \param grad_req_type requirment type of gradient saving. Can only be in + *{kNullOp, kAddTo, kWriteTo}. + * \param aux_map NDArray that stores the internal state in op + * \return a new executor, which need to be free manually. + */ + Executor* SimpleBind( + const Context& context, + const std::map& args_map, + const std::map& arg_grad_store = std::map(), + const std::map& grad_req_type = std::map(), + const std::map& aux_map = std::map()); /*! - * \brief Create an executor by bind symbol with context and arguments. - * If user do not want to compute the gradients of i-th argument, - *grad_req_type[i] can be kNullOp. - * - * \param context the context of binding. - * \param arg_arrays the NDArray that stores the input arguments to the symbol. - * \param grad_arrays NDArray that is used to store the gradient output of the - *input arguments. - * \param grad_reqs requirment type of gradient saving. Can only be in - *{kNullOp, kAddTo, kWriteTo}. - * \param aux_arrays NDArray that is used as internal state in op - * \param group_to_ctx dict of string to mx.Context - * \param shared_exec Executor to share memory with. This is intended for - *runtime reshaping, variable length sequencesn etc. The returned executor - *shares state with shared_exec, and should not be used in parallel with it. - * \return a new executor, which need to be free manually. - */ - Executor *Bind(const Context &context, const std::vector &arg_arrays, - const std::vector &grad_arrays, - const std::vector &grad_reqs, - const std::vector &aux_arrays, - const std::map &group_to_ctx = - std::map(), - Executor *shared_exec = nullptr); + * \brief Create an executor by bind symbol with context and arguments. + * If user do not want to compute the gradients of i-th argument, + *grad_req_type[i] can be kNullOp. + * + * \param context the context of binding. + * \param arg_arrays the NDArray that stores the input arguments to the symbol. + * \param grad_arrays NDArray that is used to store the gradient output of the + *input arguments. + * \param grad_reqs requirment type of gradient saving. Can only be in + *{kNullOp, kAddTo, kWriteTo}. + * \param aux_arrays NDArray that is used as internal state in op + * \param group_to_ctx dict of string to mx.Context + * \param shared_exec Executor to share memory with. This is intended for + *runtime reshaping, variable length sequencesn etc. The returned executor + *shares state with shared_exec, and should not be used in parallel with it. + * \return a new executor, which need to be free manually. + */ + Executor* Bind( + const Context& context, + const std::vector& arg_arrays, + const std::vector& grad_arrays, + const std::vector& grad_reqs, + const std::vector& aux_arrays, + const std::map& group_to_ctx = std::map(), + Executor* shared_exec = nullptr); private: std::shared_ptr blob_ptr_; static OpMap*& op_map(); }; -Symbol operator+(mx_float lhs, const Symbol &rhs); -Symbol operator-(mx_float lhs, const Symbol &rhs); -Symbol operator*(mx_float lhs, const Symbol &rhs); -Symbol operator/(mx_float lhs, const Symbol &rhs); -Symbol operator%(mx_float lhs, const Symbol &rhs); +Symbol operator+(mx_float lhs, const Symbol& rhs); +Symbol operator-(mx_float lhs, const Symbol& rhs); +Symbol operator*(mx_float lhs, const Symbol& rhs); +Symbol operator/(mx_float lhs, const Symbol& rhs); +Symbol operator%(mx_float lhs, const Symbol& rhs); } // namespace cpp } // namespace mxnet #endif // MXNET_CPP_SYMBOL_H_ From c89c0786f78552f9b8ee88cd463002b1bfec20a5 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 4 Nov 2021 09:01:48 +0100 Subject: [PATCH 05/10] [EXAMPLE] Re-format .cc .h files --- example/extensions/lib_api/libtest.cc | 8 +- example/extensions/lib_custom_op/gemm_lib.cc | 68 +++---- example/extensions/lib_custom_op/relu_lib.cc | 67 ++++--- example/extensions/lib_custom_op/relu_lib.cu | 59 +++--- example/extensions/lib_custom_op/relu_lib.h | 44 ++--- .../lib_custom_op/transposecsr_lib.cc | 101 +++++----- .../lib_custom_op/transposerowsp_lib.cc | 98 +++++----- .../extensions/lib_external_ops/min_ex-inl.h | 19 +- example/extensions/lib_external_ops/min_ex.cc | 16 +- example/extensions/lib_external_ops/min_ex.cu | 3 +- example/extensions/lib_pass/pass_lib.cc | 5 +- .../extensions/lib_subgraph/subgraph_lib.cc | 181 +++++++++--------- 12 files changed, 339 insertions(+), 330 deletions(-) diff --git a/example/extensions/lib_api/libtest.cc b/example/extensions/lib_api/libtest.cc index 0b2c6f64789c..fda2d96bc767 100644 --- a/example/extensions/lib_api/libtest.cc +++ b/example/extensions/lib_api/libtest.cc @@ -42,8 +42,8 @@ int main(void) { HINSTANCE handle; handle = LoadLibrary(TEXT("libinit_lib.dll")); #else - void *handle; - handle = dlopen("libinit_lib.so", RTLD_LAZY); + void* handle; + handle = dlopen("libinit_lib.so", RTLD_LAZY); #endif if (!handle) { @@ -54,9 +54,9 @@ int main(void) { // get initialize function address from the library initialize_t init_lib; #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) - init_lib = (initialize_t) GetProcAddress(handle, MXLIB_INITIALIZE_STR); + init_lib = (initialize_t)GetProcAddress(handle, MXLIB_INITIALIZE_STR); #else - init_lib = (initialize_t) dlsym(handle, MXLIB_INITIALIZE_STR); + init_lib = (initialize_t)dlsym(handle, MXLIB_INITIALIZE_STR); #endif if (!init_lib) { diff --git a/example/extensions/lib_custom_op/gemm_lib.cc b/example/extensions/lib_custom_op/gemm_lib.cc index 4a6a337a91df..a4e518409ea7 100644 --- a/example/extensions/lib_custom_op/gemm_lib.cc +++ b/example/extensions/lib_custom_op/gemm_lib.cc @@ -30,14 +30,18 @@ using namespace mxnet::ext; // main matrix multiplication routine -void gemm(const float* A, const float* B, float* C, - const unsigned n, const unsigned k, const unsigned m) { +void gemm(const float* A, + const float* B, + float* C, + const unsigned n, + const unsigned k, + const unsigned m) { unsigned i, j, kk; for (i = 0; i < n; i++) { for (j = 0; j < m; j++) { - C[i*m+j] = 0; + C[i * m + j] = 0; for (kk = 0; kk < k; kk++) { - C[i*m+j] += A[i*k+kk] * B[kk*m+j]; + C[i * m + j] += A[i * k + kk] * B[kk * m + j]; } } } @@ -47,7 +51,7 @@ void transpose(const float* A, float* At, const unsigned n, const unsigned m) { unsigned i, j; for (i = 0; i < n; i++) { for (j = 0; j < m; j++) { - At[i*m+j] = A[j*n+i]; + At[i * m + j] = A[j * n + i]; } } } @@ -96,8 +100,8 @@ MXReturnValue backward(const std::unordered_map& attrs const OpResource& res) { // extract data pointers from tensors float* dC = inputs->at(0).data(); - float* A = inputs->at(1).data(); - float* B = inputs->at(2).data(); + float* A = inputs->at(1).data(); + float* B = inputs->at(2).data(); float* dA = outputs->at(0).data(); float* dB = outputs->at(1).data(); // set tensor shapes @@ -106,9 +110,9 @@ MXReturnValue backward(const std::unordered_map& attrs unsigned m = inputs->at(2).shape[1]; // allocate temporary workspace memory through resource manager // for multiple arrays better to request a big memory pool - void *workspace = res.alloc_cpu((k*n + m*k) * sizeof(float)); - float *At = static_cast(workspace); - float *Bt = static_cast(workspace) + (k*n); + void* workspace = res.alloc_cpu((k * n + m * k) * sizeof(float)); + float* At = static_cast(workspace); + float* Bt = static_cast(workspace) + (k * n); transpose(A, At, k, n); transpose(B, Bt, m, k); @@ -119,15 +123,16 @@ MXReturnValue backward(const std::unordered_map& attrs } MXReturnValue parseAttrs(const std::unordered_map& attrs, - int* num_in, int* num_out) { - *num_in = 2; + int* num_in, + int* num_out) { + *num_in = 2; *num_out = 1; return MX_SUCCESS; } MXReturnValue inferType(const std::unordered_map& attrs, - std::vector *intypes, - std::vector *outtypes) { + std::vector* intypes, + std::vector* outtypes) { // validate inputs if (intypes->size() != 2) { MX_ERROR_MSG << "Expected 2 inputs to inferType"; @@ -157,10 +162,10 @@ MXReturnValue inferShape(const std::unordered_map& att return MX_FAIL; } - unsigned n = inshapes->at(0)[0]; - unsigned k = inshapes->at(0)[1]; + unsigned n = inshapes->at(0)[0]; + unsigned k = inshapes->at(0)[1]; unsigned kk = inshapes->at(1)[0]; - unsigned m = inshapes->at(1)[1]; + unsigned m = inshapes->at(1)[1]; if (k != kk) { MX_ERROR_MSG << "Exected first input axis 1 equals to second input axis 0"; return MX_FAIL; @@ -171,24 +176,23 @@ MXReturnValue inferShape(const std::unordered_map& att } REGISTER_OP(my_gemm) -.setForward(forward, "cpu") -.setBackward(backward, "cpu") -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferShape(inferShape); + .setForward(forward, "cpu") + .setBackward(backward, "cpu") + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferShape(inferShape); /* ------------------------------------------------------------------------- */ class MyStatefulGemm : public CustomStatefulOp { public: - explicit MyStatefulGemm(int count, - std::unordered_map attrs) - : count(count), attrs_(std::move(attrs)) {} + explicit MyStatefulGemm(int count, std::unordered_map attrs) + : count(count), attrs_(std::move(attrs)) {} ~MyStatefulGemm() override { std::cout << "Info: destructing MyStatefulGemm" << std::endl; } - + MXReturnValue Forward(std::vector* inputs, std::vector* outputs, const OpResource& op_res) override { @@ -209,7 +213,7 @@ class MyStatefulGemm : public CustomStatefulOp { MXReturnValue createOpState(const std::unordered_map& attrs, const MXContext& ctx, - const std::vector >& in_shapes, + const std::vector>& in_shapes, const std::vector in_types, CustomStatefulOp** op_inst) { // testing passing of keyword arguments @@ -227,11 +231,11 @@ MXReturnValue mutateInputs(const std::unordered_map& a } REGISTER_OP(state_gemm) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferShape(inferShape) -.setMutateInputs(mutateInputs) -.setCreateOpState(createOpState, "cpu"); + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferShape(inferShape) + .setMutateInputs(mutateInputs) + .setCreateOpState(createOpState, "cpu"); MXReturnValue initialize(int version) { if (version >= 10700) { diff --git a/example/extensions/lib_custom_op/relu_lib.cc b/example/extensions/lib_custom_op/relu_lib.cc index 8bbb99f61a54..ff2041b9ff04 100644 --- a/example/extensions/lib_custom_op/relu_lib.cc +++ b/example/extensions/lib_custom_op/relu_lib.cc @@ -29,8 +29,9 @@ using namespace mxnet::ext; MXReturnValue parseAttrs(const std::unordered_map& attrs, - int* num_in, int* num_out) { - *num_in = 1; + int* num_in, + int* num_out) { + *num_in = 1; *num_out = 1; return MX_SUCCESS; } @@ -53,9 +54,9 @@ MXReturnValue forwardCPU(const std::unordered_map& att std::vector* inputs, std::vector* outputs, const OpResource& res) { - float* in_data = inputs->at(0).data(); + float* in_data = inputs->at(0).data(); float* out_data = outputs->at(0).data(); - for (int i=0; iat(0).size(); i++) { + for (int i = 0; i < inputs->at(0).size(); i++) { out_data[i] = in_data[i] > 0 ? in_data[i] : 0; } return MX_SUCCESS; @@ -66,26 +67,25 @@ MXReturnValue backwardCPU(const std::unordered_map& at std::vector* outputs, const OpResource& res) { float* out_grad = inputs->at(0).data(); - float* in_data = inputs->at(1).data(); - float* in_grad = outputs->at(0).data(); - for (int i=0; iat(1).size(); i++) { + float* in_data = inputs->at(1).data(); + float* in_grad = outputs->at(0).data(); + for (int i = 0; i < inputs->at(1).size(); i++) { in_grad[i] = in_data[i] > 0 ? 1 * out_grad[i] : 0; } return MX_SUCCESS; } REGISTER_OP(my_relu) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferShape(inferShape) -.setForward(forwardCPU, "cpu") -.setForward(forwardGPU, "gpu") -.setBackward(backwardCPU, "cpu") -.setBackward(backwardGPU, "gpu"); - + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferShape(inferShape) + .setForward(forwardCPU, "cpu") + .setForward(forwardGPU, "gpu") + .setBackward(backwardCPU, "cpu") + .setBackward(backwardGPU, "gpu"); MyStatefulReluCPU::MyStatefulReluCPU(const std::unordered_map& attrs) - : attrs_(attrs) {} + : attrs_(attrs) {} MXReturnValue MyStatefulReluCPU::Forward(std::vector* inputs, std::vector* outputs, @@ -100,7 +100,7 @@ MXReturnValue MyStatefulReluCPU::Backward(std::vector* inputs, } MyStatefulReluGPU::MyStatefulReluGPU(const std::unordered_map& attrs) - : attrs_(attrs) {} + : attrs_(attrs) {} MXReturnValue MyStatefulReluGPU::Forward(std::vector* inputs, std::vector* outputs, @@ -114,10 +114,9 @@ MXReturnValue MyStatefulReluGPU::Backward(std::vector* inputs, return backwardGPU(attrs_, inputs, outputs, op_res); } - MXReturnValue createOpStateCPU(const std::unordered_map& attrs, const MXContext& ctx, - const std::vector >& in_shapes, + const std::vector>& in_shapes, const std::vector in_types, CustomStatefulOp** op_inst) { *op_inst = new MyStatefulReluCPU(attrs); @@ -126,7 +125,7 @@ MXReturnValue createOpStateCPU(const std::unordered_map& attrs, const MXContext& ctx, - const std::vector >& in_shapes, + const std::vector>& in_shapes, const std::vector in_types, CustomStatefulOp** op_inst) { *op_inst = new MyStatefulReluGPU(attrs); @@ -134,23 +133,23 @@ MXReturnValue createOpStateGPU(const std::unordered_map& attrs, std::vector* inputs, std::vector* outputs, const OpResource& res) { - float* in_data = inputs->at(0).data(); + float* in_data = inputs->at(0).data(); float* out_data = outputs->at(0).data(); mx_cpu_rand_t* states = res.get_cpu_rand_states(); std::normal_distribution dist_normal; - for (int i=0; iat(0).size(); ++i) { + for (int i = 0; i < inputs->at(0).size(); ++i) { float noise = dist_normal(*states); out_data[i] = in_data[i] + noise > 0 ? in_data[i] + noise : 0; } @@ -158,13 +157,13 @@ MXReturnValue noisyForwardCPU(const std::unordered_map } REGISTER_OP(my_noisy_relu) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferShape(inferShape) -.setForward(noisyForwardCPU, "cpu") -.setForward(noisyForwardGPU, "gpu") -.setBackward(backwardCPU, "cpu") -.setBackward(backwardGPU, "gpu"); + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferShape(inferShape) + .setForward(noisyForwardCPU, "cpu") + .setForward(noisyForwardGPU, "gpu") + .setBackward(backwardCPU, "cpu") + .setBackward(backwardGPU, "gpu"); MXReturnValue initialize(int version) { if (version >= 20000) { diff --git a/example/extensions/lib_custom_op/relu_lib.cu b/example/extensions/lib_custom_op/relu_lib.cu index c309274e61c6..d9643cd68ab4 100644 --- a/example/extensions/lib_custom_op/relu_lib.cu +++ b/example/extensions/lib_custom_op/relu_lib.cu @@ -28,7 +28,7 @@ using namespace mxnet::ext; -__global__ void relu_gpu_forward(float *out, float *in, int64_t N) { +__global__ void relu_gpu_forward(float* out, float* in, int64_t N) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < N) out[tid] = in[tid] > 0 ? in[tid] : 0; @@ -38,19 +38,19 @@ MXReturnValue forwardGPU(const std::unordered_map& att std::vector* inputs, std::vector* outputs, const OpResource& res) { - float* in_data = inputs->at(0).data(); + float* in_data = inputs->at(0).data(); float* out_data = outputs->at(0).data(); mx_stream_t cuda_stream = res.get_cuda_stream(); - int64_t N = inputs->at(0).size(); - int num_block = (N + NumThreadPerBlock - 1) / NumThreadPerBlock; + int64_t N = inputs->at(0).size(); + int num_block = (N + NumThreadPerBlock - 1) / NumThreadPerBlock; - relu_gpu_forward<<>>(out_data, in_data, N); + relu_gpu_forward<<>>(out_data, in_data, N); return MX_SUCCESS; } -__global__ void relu_gpu_backward(float *ingrad, float *outgrad, float *indata, int64_t N) { +__global__ void relu_gpu_backward(float* ingrad, float* outgrad, float* indata, int64_t N) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < N) ingrad[tid] = indata[tid] > 0 ? 1 * outgrad[tid] : 0; @@ -61,40 +61,45 @@ MXReturnValue backwardGPU(const std::unordered_map& at std::vector* outputs, const OpResource& res) { float* out_grad = inputs->at(0).data(); - float* in_data = inputs->at(1).data(); - float* in_grad = outputs->at(0).data(); + float* in_data = inputs->at(1).data(); + float* in_grad = outputs->at(0).data(); mx_stream_t cuda_stream = res.get_cuda_stream(); - int64_t N = inputs->at(0).size(); - int num_block = (N + NumThreadPerBlock - 1) / NumThreadPerBlock; - relu_gpu_backward<<>>(in_grad, out_grad, in_data, N); + int64_t N = inputs->at(0).size(); + int num_block = (N + NumThreadPerBlock - 1) / NumThreadPerBlock; + relu_gpu_backward<<>>( + in_grad, out_grad, in_data, N); return MX_SUCCESS; } -__global__ void noisy_relu_gpu_forward(float *out, float *in, int64_t N, mx_gpu_rand_t* states, int step) { - // the launcher logic ensures tid less than NumGPURandomStates - int tid = blockIdx.x * blockDim.x + threadIdx.x; - // each thread generates unique sequence of random numbers - mx_gpu_rand_t thread_state = states[tid]; - // each thread works on number of calculation - int start = tid * step; - int end = start + step; - for (int i=start; i 0 ? in[i] + noise : 0; - } +__global__ void noisy_relu_gpu_forward(float* out, + float* in, + int64_t N, + mx_gpu_rand_t* states, + int step) { + // the launcher logic ensures tid less than NumGPURandomStates + int tid = blockIdx.x * blockDim.x + threadIdx.x; + // each thread generates unique sequence of random numbers + mx_gpu_rand_t thread_state = states[tid]; + // each thread works on number of calculation + int start = tid * step; + int end = start + step; + for (int i = start; i < end && i < N; ++i) { + float noise = curand_normal(&thread_state); + out[i] = in[i] + noise > 0 ? in[i] + noise : 0; + } } MXReturnValue noisyForwardGPU(const std::unordered_map& attrs, std::vector* inputs, std::vector* outputs, const OpResource& res) { - float* in_data = inputs->at(0).data(); + float* in_data = inputs->at(0).data(); float* out_data = outputs->at(0).data(); mx_stream_t cuda_stream = res.get_cuda_stream(); - int64_t N = inputs->at(0).size(); + int64_t N = inputs->at(0).size(); // below is mxnet recommended workflow to parallel random number generating int nthread = (N + NumRandomPerThread - 1) / NumRandomPerThread; @@ -105,8 +110,8 @@ MXReturnValue noisyForwardGPU(const std::unordered_map // this can ensure number of parallel threads less than mxnet supported random number states int num_block = (num_thread_need + NumThreadPerBlock - 1) / NumThreadPerBlock; - noisy_relu_gpu_forward<<>>( - out_data, in_data, N, res.get_gpu_rand_states(), step); + noisy_relu_gpu_forward<<>>( + out_data, in_data, N, res.get_gpu_rand_states(), step); return MX_SUCCESS; } diff --git a/example/extensions/lib_custom_op/relu_lib.h b/example/extensions/lib_custom_op/relu_lib.h index 5aadfe930340..c0e250f340e5 100644 --- a/example/extensions/lib_custom_op/relu_lib.h +++ b/example/extensions/lib_custom_op/relu_lib.h @@ -31,38 +31,38 @@ using namespace mxnet::ext; -#define NumThreadPerBlock 256 // mxnet recommended cuda thread number per block -#define NumRandomPerThread 64 // mxnet recommended random numbers generated per thread +#define NumThreadPerBlock 256 // mxnet recommended cuda thread number per block +#define NumRandomPerThread 64 // mxnet recommended random numbers generated per thread class MyStatefulReluCPU : public CustomStatefulOp { - public: - explicit MyStatefulReluCPU(const std::unordered_map& attrs); + public: + explicit MyStatefulReluCPU(const std::unordered_map& attrs); - MXReturnValue Forward(std::vector* inputs, + MXReturnValue Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res); + MXReturnValue Backward(std::vector* inputs, std::vector* outputs, const OpResource& op_res); - MXReturnValue Backward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res); - private: - const std::unordered_map attrs_; + private: + const std::unordered_map attrs_; }; class MyStatefulReluGPU : public CustomStatefulOp { - public: - explicit MyStatefulReluGPU(const std::unordered_map& attrs); + public: + explicit MyStatefulReluGPU(const std::unordered_map& attrs); - MXReturnValue Forward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res); - - MXReturnValue Backward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res); - - private: - const std::unordered_map attrs_; + MXReturnValue Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res); + + MXReturnValue Backward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res); + + private: + const std::unordered_map attrs_; }; MXReturnValue forwardGPU(const std::unordered_map& attrs, diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc index e8a8bb7a3ee1..97c0153b63d2 100644 --- a/example/extensions/lib_custom_op/transposecsr_lib.cc +++ b/example/extensions/lib_custom_op/transposecsr_lib.cc @@ -30,35 +30,35 @@ using namespace mxnet::ext; void transpose(MXTensor& src, MXTensor& dst, const OpResource& res) { - MXSparse* A = src.data(); - MXSparse* B = dst.data(); + MXSparse* A = src.data(); + MXSparse* B = dst.data(); std::vector shape = src.shape; - int64_t h = shape[0]; - int64_t w = shape[1]; - if(src.stype == kCSRStorage) { - float *Aval = (float*) (A->data); + int64_t h = shape[0]; + int64_t w = shape[1]; + if (src.stype == kCSRStorage) { + float* Aval = (float*)(A->data); // Here we need one more element to help calculate index(line 57). std::vector rowPtr(w + 2, 0); // count column - for(int i = 0; i < A->data_len; i++) { + for (int i = 0; i < A->data_len; i++) { rowPtr[A->indices[i] + 2]++; } - // Accumulated sum. After this for loop, rowPtr[1:w+2) stores the correct + // Accumulated sum. After this for loop, rowPtr[1:w+2) stores the correct // result of transposed rowPtr. - for(int i = 2; i < rowPtr.size(); i++) { + for (int i = 2; i < rowPtr.size(); i++) { rowPtr[i] += rowPtr[i - 1]; } - + // Alloc memory for sparse data, where 0 is the index // of B in output vector. res.alloc_sparse(B, 0, A->data_len, w + 1); - float *Bval = (float*) (B->data); - for(int i = 0; i < h; i++) { - for(int j = A->indptr[i]; j < A->indptr[i + 1]; j++) { - // Helps calculate index and after that rowPtr[0:w+1) stores the + float* Bval = (float*)(B->data); + for (int i = 0; i < h; i++) { + for (int j = A->indptr[i]; j < A->indptr[i + 1]; j++) { + // Helps calculate index and after that rowPtr[0:w+1) stores the // correct result of transposed rowPtr. - int index = rowPtr[A->indices[j] + 1]++; - Bval[index] = Aval[j]; + int index = rowPtr[A->indices[j] + 1]++; + Bval[index] = Aval[j]; B->indices[index] = i; } } @@ -70,10 +70,9 @@ MXReturnValue forward(const std::unordered_map& attrs, std::vector* inputs, std::vector* outputs, const OpResource& res) { - // The data types and storage types of inputs and outputs should be the same. - if(inputs->at(0).dtype != outputs->at(0).dtype || - inputs->at(0).stype != outputs->at(0).stype) { - MX_ERROR_MSG << "Error! Expected all inputs and outputs to be the same type." + // The data types and storage types of inputs and outputs should be the same. + if (inputs->at(0).dtype != outputs->at(0).dtype || inputs->at(0).stype != outputs->at(0).stype) { + MX_ERROR_MSG << "Error! Expected all inputs and outputs to be the same type." << "Found input storage type:" << inputs->at(0).stype << " Found output storage type:" << outputs->at(0).stype << " Found input data type:" << inputs->at(0).dtype @@ -93,8 +92,9 @@ MXReturnValue backward(const std::unordered_map& attrs } MXReturnValue parseAttrs(const std::unordered_map& attrs, - int* num_in, int* num_out) { - *num_in = 1; + int* num_in, + int* num_out) { + *num_in = 1; *num_out = 1; return MX_SUCCESS; } @@ -142,42 +142,41 @@ MXReturnValue inferShape(const std::unordered_map& att } REGISTER_OP(my_transposecsr) -.setForward(forward, "cpu") -.setBackward(backward, "cpu") -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferSType(inferSType) -.setInferShape(inferShape); + .setForward(forward, "cpu") + .setBackward(backward, "cpu") + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferSType(inferSType) + .setInferShape(inferShape); /* ------------------------------------------------------------------------- */ class MyStatefulTransposeCSR : public CustomStatefulOp { - public: - explicit MyStatefulTransposeCSR(int count, - std::unordered_map attrs) + public: + explicit MyStatefulTransposeCSR(int count, std::unordered_map attrs) : count(count), attrs_(std::move(attrs)) {} - MXReturnValue Forward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) override { - std::cout << "Info: keyword + number of forward: " << ++count << std::endl; - return forward(attrs_, inputs, outputs, op_res); - } + MXReturnValue Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) override { + std::cout << "Info: keyword + number of forward: " << ++count << std::endl; + return forward(attrs_, inputs, outputs, op_res); + } - MXReturnValue Backward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) override { - return backward(attrs_, inputs, outputs, op_res); - } + MXReturnValue Backward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) override { + return backward(attrs_, inputs, outputs, op_res); + } - private: - int count; - const std::unordered_map attrs_; + private: + int count; + const std::unordered_map attrs_; }; MXReturnValue createOpState(const std::unordered_map& attrs, const MXContext& ctx, - const std::vector >& in_shapes, + const std::vector>& in_shapes, const std::vector in_types, CustomStatefulOp** op_inst) { // testing passing of keyword arguments @@ -189,11 +188,11 @@ MXReturnValue createOpState(const std::unordered_map& } REGISTER_OP(my_state_transposecsr) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferSType(inferSType) -.setInferShape(inferShape) -.setCreateOpState(createOpState, "cpu"); + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferSType(inferSType) + .setInferShape(inferShape) + .setCreateOpState(createOpState, "cpu"); MXReturnValue initialize(int version) { if (version >= 10700) { diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc index ffb43db16dbc..6255fab78801 100644 --- a/example/extensions/lib_custom_op/transposerowsp_lib.cc +++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc @@ -31,26 +31,25 @@ using namespace mxnet::ext; void transpose(MXTensor& src, MXTensor& dst, const OpResource& res) { MXSparse* A = src.data(); - MXSparse* B = dst.data(); + MXSparse* B = dst.data(); std::vector shape = src.shape; - int64_t h = shape[0]; - int64_t w = shape[1]; - if(src.stype == kRowSparseStorage) { + int64_t h = shape[0]; + int64_t w = shape[1]; + if (src.stype == kRowSparseStorage) { // Keys of the map is the row index of transposed tensors. - // Values of the map is the rows which have non-zero elements. + // Values of the map is the rows which have non-zero elements. std::map> mp; - float *Aval = (float*) (A->data); - for(int i = 0; i < A->data_len; i++) { + float* Aval = (float*)(A->data); + for (int i = 0; i < A->data_len; i++) { int row = i / w; int col = i % w; - row = A->indices[row]; - if(Aval[i] != 0) { - if(mp.find(col) == mp.end()) { - mp[col] = std::vector(h, 0); + row = A->indices[row]; + if (Aval[i] != 0) { + if (mp.find(col) == mp.end()) { + mp[col] = std::vector(h, 0); mp[col][row] = Aval[i]; - } - else { + } else { mp[col][row] = Aval[i]; } } @@ -58,11 +57,11 @@ void transpose(MXTensor& src, MXTensor& dst, const OpResource& res) { // Alloc memory for output tensors. res.alloc_sparse(B, 0, mp.size()); - float *Bval = (float*) (B->data); + float* Bval = (float*)(B->data); int didx = 0, iidx = 0; - for(const auto& i : mp) { + for (const auto& i : mp) { B->indices[iidx++] = i.first; - for(auto j : i.second) { + for (auto j : i.second) { Bval[didx++] = j; } } @@ -74,8 +73,7 @@ MXReturnValue forward(const std::unordered_map& attrs, std::vector* outputs, const OpResource& res) { // The data types and storage types of inputs and outputs should be the same. - if(inputs->at(0).dtype != outputs->at(0).dtype || - inputs->at(0).stype != outputs->at(0).stype) { + if (inputs->at(0).dtype != outputs->at(0).dtype || inputs->at(0).stype != outputs->at(0).stype) { MX_ERROR_MSG << "Error! Expected all inputs and outputs to be the same type." << "Found input storage type:" << inputs->at(0).stype << " Found output storage type:" << outputs->at(0).stype @@ -95,8 +93,9 @@ MXReturnValue backward(const std::unordered_map& attrs } MXReturnValue parseAttrs(const std::unordered_map& attrs, - int* num_in, int* num_out) { - *num_in = 1; + int* num_in, + int* num_out) { + *num_in = 1; *num_out = 1; return MX_SUCCESS; } @@ -144,59 +143,58 @@ MXReturnValue inferShape(const std::unordered_map& att } REGISTER_OP(my_transposerowsp) -.setForward(forward, "cpu") -.setBackward(backward, "cpu") -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferSType(inferSType) -.setInferShape(inferShape); + .setForward(forward, "cpu") + .setBackward(backward, "cpu") + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferSType(inferSType) + .setInferShape(inferShape); /* ------------------------------------------------------------------------- */ class MyStatefulTransposeRowSP : public CustomStatefulOp { - public: - explicit MyStatefulTransposeRowSP(int count, - std::unordered_map attrs) + public: + explicit MyStatefulTransposeRowSP(int count, std::unordered_map attrs) : count(count), attrs_(std::move(attrs)) {} - MXReturnValue Forward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) override { - std::cout << "Info: keyword + number of forward: " << ++count << std::endl; - return forward(attrs_, inputs, outputs, op_res); - } + MXReturnValue Forward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) override { + std::cout << "Info: keyword + number of forward: " << ++count << std::endl; + return forward(attrs_, inputs, outputs, op_res); + } - MXReturnValue Backward(std::vector* inputs, - std::vector* outputs, - const OpResource& op_res) override { - return backward(attrs_, inputs, outputs, op_res); - } + MXReturnValue Backward(std::vector* inputs, + std::vector* outputs, + const OpResource& op_res) override { + return backward(attrs_, inputs, outputs, op_res); + } - private: - int count; - const std::unordered_map attrs_; + private: + int count; + const std::unordered_map attrs_; }; MXReturnValue createOpState(const std::unordered_map& attrs, const MXContext& ctx, - const std::vector >& in_shapes, + const std::vector>& in_shapes, const std::vector in_types, CustomStatefulOp** op_inst) { // testing passing of keyword arguments int count = attrs.count("test_kw") > 0 ? std::stoi(attrs.at("test_kw")) : 0; // creating stateful operator instance - *op_inst = new MyStatefulTransposeRowSP(count, attrs); + *op_inst = new MyStatefulTransposeRowSP(count, attrs); (*op_inst)->ignore_warn = true; std::cout << "Info: stateful operator created" << std::endl; return MX_SUCCESS; } REGISTER_OP(my_state_transposerowsp) -.setParseAttrs(parseAttrs) -.setInferType(inferType) -.setInferSType(inferSType) -.setInferShape(inferShape) -.setCreateOpState(createOpState, "cpu"); + .setParseAttrs(parseAttrs) + .setInferType(inferType) + .setInferSType(inferSType) + .setInferShape(inferShape) + .setCreateOpState(createOpState, "cpu"); MXReturnValue initialize(int version) { if (version >= 10700) { diff --git a/example/extensions/lib_external_ops/min_ex-inl.h b/example/extensions/lib_external_ops/min_ex-inl.h index 79ce5d407890..a3f1d9e45932 100644 --- a/example/extensions/lib_external_ops/min_ex-inl.h +++ b/example/extensions/lib_external_ops/min_ex-inl.h @@ -36,31 +36,30 @@ namespace mxnet { namespace op { -template +template void MinExForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { - //do nothing + // do nothing } - inline bool MinExOpShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_attrs, mxnet::ShapeVector* out_attrs) { - //do nothing - return true; + // do nothing + return true; } inline bool MinExOpType(const nnvm::NodeAttrs& attrs, - std::vector *in_attrs, - std::vector *out_attrs) { - //do nothing + std::vector* in_attrs, + std::vector* out_attrs) { + // do nothing return true; } -} // namespace op -} // namespace mxnet +} // namespace op +} // namespace mxnet #endif // MXNET_OPERATOR_TENSOR_MIN_EX_OP_INL_H_ diff --git a/example/extensions/lib_external_ops/min_ex.cc b/example/extensions/lib_external_ops/min_ex.cc index cb9f6dda8b1e..eb6d5bd6fda1 100644 --- a/example/extensions/lib_external_ops/min_ex.cc +++ b/example/extensions/lib_external_ops/min_ex.cc @@ -29,12 +29,12 @@ namespace mxnet { namespace op { NNVM_REGISTER_OP(min_ex) -.describe("some description") -.set_num_inputs(0) -.set_num_outputs(0) -.set_attr("FInferShape", MinExOpShape) -.set_attr("FInferType", MinExOpType) -.set_attr("FCompute", MinExForward); + .describe("some description") + .set_num_inputs(0) + .set_num_outputs(0) + .set_attr("FInferShape", MinExOpShape) + .set_attr("FInferType", MinExOpType) + .set_attr("FCompute", MinExForward); -} // namespace op -} // namespace mxnet +} // namespace op +} // namespace mxnet diff --git a/example/extensions/lib_external_ops/min_ex.cu b/example/extensions/lib_external_ops/min_ex.cu index 6257ea703ba3..c65d9b67ccf6 100644 --- a/example/extensions/lib_external_ops/min_ex.cu +++ b/example/extensions/lib_external_ops/min_ex.cu @@ -28,8 +28,7 @@ namespace mxnet { namespace op { -NNVM_REGISTER_OP(min_ex) -.set_attr("FCompute", MinExForward); +NNVM_REGISTER_OP(min_ex).set_attr("FCompute", MinExForward); } // namespace op } // namespace mxnet diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc index fb9a2d42f8d3..2e90c9f0b3e7 100644 --- a/example/extensions/lib_pass/pass_lib.cc +++ b/example/extensions/lib_pass/pass_lib.cc @@ -31,7 +31,7 @@ using namespace mxnet::ext; /* \brief a basic pass that prints out the options and the graph */ -MXReturnValue myPass(mxnet::ext::Graph *g, +MXReturnValue myPass(mxnet::ext::Graph* g, const std::unordered_map& options) { for (auto kv : options) { std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl; @@ -40,8 +40,7 @@ MXReturnValue myPass(mxnet::ext::Graph *g, return MX_SUCCESS; } -REGISTER_PASS(myPass) -.setBody(myPass); +REGISTER_PASS(myPass).setBody(myPass); MXReturnValue initialize(int version) { if (version >= 10700) { diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc index 9345b6a13ab4..96b2a1adf0d9 100644 --- a/example/extensions/lib_subgraph/subgraph_lib.cc +++ b/example/extensions/lib_subgraph/subgraph_lib.cc @@ -32,17 +32,17 @@ using namespace mxnet::ext; /* function to execute log operator on floats */ -void myLog(MXTensor *in, MXTensor *out) { - float* inp = in->data(); +void myLog(MXTensor* in, MXTensor* out) { + float* inp = in->data(); float* outp = out->data(); for (int64_t i = 0; i < in->size(); i++) { outp[i] = logf(inp[i]); } } /* function to execute exp operator on floats */ -void myExp(MXTensor *in, MXTensor *out) { - float* inp = in->data(); - float* outp =out->data(); +void myExp(MXTensor* in, MXTensor* out) { + float* inp = in->data(); + float* outp = out->data(); for (int64_t i = 0; i < in->size(); i++) { outp[i] = expf(inp[i]); } @@ -51,15 +51,15 @@ void myExp(MXTensor *in, MXTensor *out) { /* function to execute ops in subgraph * In MXNet, subgraphs are sorted in topological order * so all we need to do is go through the ops in order - * and execute each op. + * and execute each op. */ MXReturnValue myExecutor(std::vector* inputs, std::vector* outputs, - mxnet::ext::Graph *subgraph) { + mxnet::ext::Graph* subgraph) { std::cout << "Info: subgraph is: " << std::endl; subgraph->print(); - //counter for inputs + // counter for inputs int input_cnt = 0; // temporary tensor storage std::vector data; @@ -67,7 +67,7 @@ MXReturnValue myExecutor(std::vector* inputs, std::vector to_free; // loop over nodes - for(int i=0; isize(); i++) { + for (int i = 0; i < subgraph->size(); i++) { mxnet::ext::Node* node = subgraph->getNode(i); // handle each op type if (node->op.compare("null") == 0) { @@ -75,26 +75,36 @@ MXReturnValue myExecutor(std::vector* inputs, node->tensor = &inputs->at(input_cnt++); } else if (node->op.compare("log") == 0) { // get input tensor based on node ID inputs from data storage - MXTensor *input = node->inputs.at(0).node->tensor; + MXTensor* input = node->inputs.at(0).node->tensor; // create temporary storage - MXTensor tmp(malloc(input->size()*4), input->shape, input->dtype, 0, MXContext::CPU(0), kDefaultStorage); // NOLINT + MXTensor tmp(malloc(input->size() * 4), + input->shape, + input->dtype, + 0, + MXContext::CPU(0), + kDefaultStorage); // NOLINT // save allocated ptr to free later to_free.push_back(tmp.data_ptr); // execute log operator - myLog(input,&tmp); + myLog(input, &tmp); // add output tensor to data storage data.push_back(tmp); // set tensor for this node so we can read it later node->tensor = &data.back(); } else if (node->op.compare("exp") == 0) { // get input tensor based on node ID inputs from data storage - MXTensor *input = node->inputs.at(0).node->tensor; + MXTensor* input = node->inputs.at(0).node->tensor; // create temporary storage - MXTensor tmp(malloc(input->size()*4), input->shape, input->dtype, 0, MXContext::CPU(0), kDefaultStorage); // NOLINT + MXTensor tmp(malloc(input->size() * 4), + input->shape, + input->dtype, + 0, + MXContext::CPU(0), + kDefaultStorage); // NOLINT // save allocated ptr to free later to_free.push_back(tmp.data_ptr); - // execute exp operator - myExp(input,&tmp); + // execute exp operator + myExp(input, &tmp); // add output tensor to data storage data.push_back(tmp); // set tensor for this node so we can read it later @@ -107,15 +117,15 @@ MXReturnValue myExecutor(std::vector* inputs, return MX_FAIL; } } - + // copy all operator results to outputs of subgraph for (int j = 0; j < subgraph->outputs.size(); j++) { // get computed result - MXTensor *result = subgraph->outputs[j].node->tensor; + MXTensor* result = subgraph->outputs[j].node->tensor; // get output tensor to pass to MX - MXTensor &out = outputs->at(j); - float *out_data = out.data(); - float *res_data = result->data(); + MXTensor& out = outputs->at(j); + float* out_data = out.data(); + float* res_data = result->data(); // loop and copy data for (int64_t i = 0; i < result->size(); i++) { out_data[i] = res_data[i]; @@ -126,16 +136,15 @@ MXReturnValue myExecutor(std::vector* inputs, for (void* ptr : to_free) { free(ptr); // NOLINT } - + return MX_SUCCESS; } class MyStatefulOp : public CustomStatefulOp { public: - explicit MyStatefulOp(std::string json, - const std::unordered_map& attrs) - : attrs_(attrs) { - for (const auto &kv : attrs) { + explicit MyStatefulOp(std::string json, const std::unordered_map& attrs) + : attrs_(attrs) { + for (const auto& kv : attrs) { std::cout << "subgraphOp attributes: " << kv.first << " ==> " << kv.second << std::endl; } subgraph_ = mxnet::ext::Graph::fromString(json); @@ -144,14 +153,14 @@ class MyStatefulOp : public CustomStatefulOp { MXReturnValue Forward(std::vector* inputs, std::vector* outputs, const OpResource& op_res) override { - if(attrs_.count(MX_STR_EXTRA_INPUTS) > 0 && std::stoi(attrs_.at(MX_STR_EXTRA_INPUTS)) > 0) + if (attrs_.count(MX_STR_EXTRA_INPUTS) > 0 && std::stoi(attrs_.at(MX_STR_EXTRA_INPUTS)) > 0) std::cout << "forward::extra_inputs(" << attrs_.at(MX_STR_EXTRA_INPUTS) << ")::inputs [" - << inputs->size() << "]" << std::endl; + << inputs->size() << "]" << std::endl; return myExecutor(inputs, outputs, subgraph_); } private: - mxnet::ext::Graph *subgraph_; + mxnet::ext::Graph* subgraph_; const std::unordered_map attrs_; }; @@ -172,11 +181,9 @@ MXReturnValue createOpState(const std::unordered_map& return MX_SUCCESS; } -REGISTER_OP(_custom_subgraph_op) -.setIsSubgraphOp() -.setCreateOpState(createOpState, "cpu"); +REGISTER_OP(_custom_subgraph_op).setIsSubgraphOp().setCreateOpState(createOpState, "cpu"); -const std::vector op_names({"exp","log"}); +const std::vector op_names({"exp", "log"}); MXReturnValue mySupportedOps(const mxnet::ext::Graph* graph, std::vector* ids, @@ -185,22 +192,22 @@ MXReturnValue mySupportedOps(const mxnet::ext::Graph* graph, std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl; } - //loop over nodes - for(int i=0; isize(); i++) { - const mxnet::ext::Node *node = graph->getNode(i); + // loop over nodes + for (int i = 0; i < graph->size(); i++) { + const mxnet::ext::Node* node = graph->getNode(i); - //get shape/type if available + // get shape/type if available std::string shape; int dtype = -1; - if(node->attrs.count("shape") > 0) + if (node->attrs.count("shape") > 0) shape = node->attrs.at("shape"); - if(node->attrs.count("dtype") > 0) + if (node->attrs.count("dtype") > 0) dtype = std::stoi(node->attrs.at("dtype")); - //check if op dtype is float, and if option was specified to require float types - if((dtype == kFloat32 && options.count("reqFloat") > 0) || options.count("reqFloat") == 0) { - //check if op is in allowlist - if(std::find(op_names.begin(),op_names.end(),node->op.c_str()) != op_names.end()) { + // check if op dtype is float, and if option was specified to require float types + if ((dtype == kFloat32 && options.count("reqFloat") > 0) || options.count("reqFloat") == 0) { + // check if op is in allowlist + if (std::find(op_names.begin(), op_names.end(), node->op.c_str()) != op_names.end()) { // found op in allowlist, set value to -1 to include op in any subgraph ids->at(i) = -1; } @@ -209,7 +216,9 @@ MXReturnValue mySupportedOps(const mxnet::ext::Graph* graph, return MX_SUCCESS; } -MXReturnValue myReviewSubgraph(const mxnet::ext::Graph *subgraph, int subgraph_id, bool* accept, +MXReturnValue myReviewSubgraph(const mxnet::ext::Graph* subgraph, + int subgraph_id, + bool* accept, const std::unordered_map& options, std::unordered_map* attrs) { for (auto kv : options) { @@ -221,7 +230,7 @@ MXReturnValue myReviewSubgraph(const mxnet::ext::Graph *subgraph, int subgraph_i std::cout << sg << std::endl; // check if option `reject` was specified, and if so check if value is 'True' - if(options.count("reject") > 0 && options.at("reject").compare("True") == 0) { + if (options.count("reject") > 0 && options.at("reject").compare("True") == 0) { // if specified, reject the subgraph. this is only used for testing *accept = false; std::cout << "rejecting subgraph" << std::endl; @@ -230,43 +239,42 @@ MXReturnValue myReviewSubgraph(const mxnet::ext::Graph *subgraph, int subgraph_i std::cout << "accepting subgraph" << std::endl; } - attrs->emplace("myKey","myVal"); + attrs->emplace("myKey", "myVal"); return MX_SUCCESS; } REGISTER_PARTITIONER(myProp) -.addStrategy("strategy1", "_custom_subgraph_op") -.setSupportedOps("strategy1", mySupportedOps) -.setReviewSubgraph("strategy1", myReviewSubgraph); + .addStrategy("strategy1", "_custom_subgraph_op") + .setSupportedOps("strategy1", mySupportedOps) + .setReviewSubgraph("strategy1", myReviewSubgraph); class MySelector : public CustomOpSelector { public: - MySelector(const mxnet::ext::Graph *graph, - const std::unordered_map& options) : - graph_(graph), options_(options) { + MySelector(const mxnet::ext::Graph* graph, + const std::unordered_map& options) + : graph_(graph), options_(options) { for (auto kv : options) { - std::cout << "selector options: " << kv.first - << " ==> " << kv.second << std::endl; + std::cout << "selector options: " << kv.first << " ==> " << kv.second << std::endl; } } bool chooseNode(int nodeID) { - const mxnet::ext::Node *node = graph_->getNode(nodeID); + const mxnet::ext::Node* node = graph_->getNode(nodeID); - //get shape/type if available + // get shape/type if available std::string shape; int dtype = -1; - if(node->attrs.count("shape") > 0) + if (node->attrs.count("shape") > 0) shape = node->attrs.at("shape"); - if(node->attrs.count("dtype") > 0) + if (node->attrs.count("dtype") > 0) dtype = std::stoi(node->attrs.at("dtype")); - //check if op dtype is float, and if option was specified to require float types - if((dtype == kFloat32 && options_.count("reqFloat") > 0) || options_.count("reqFloat") == 0) { - //check if op is in allowlist - if(std::find(op_names.begin(),op_names.end(),node->op.c_str()) != op_names.end()) { + // check if op dtype is float, and if option was specified to require float types + if ((dtype == kFloat32 && options_.count("reqFloat") > 0) || options_.count("reqFloat") == 0) { + // check if op is in allowlist + if (std::find(op_names.begin(), op_names.end(), node->op.c_str()) != op_names.end()) { // found op in allowlist, return true to include op subgraph - return true; + return true; } } return false; @@ -280,17 +288,18 @@ class MySelector : public CustomOpSelector { bool SelectOutput(int nodeID, int output_nodeID) override { return chooseNode(output_nodeID); } - virtual void Filter(std::vector& candidates, - std::vector& keep) { + virtual void Filter(std::vector& candidates, std::vector& keep) { keep.insert(keep.end(), candidates.begin(), candidates.end()); } void Reset() override {} + private: - const mxnet::ext::Graph *graph_; + const mxnet::ext::Graph* graph_; const std::unordered_map options_; }; -MXReturnValue createSelector(const mxnet::ext::Graph *graph, CustomOpSelector** sel_inst, +MXReturnValue createSelector(const mxnet::ext::Graph* graph, + CustomOpSelector** sel_inst, const std::unordered_map& options) { *sel_inst = new MySelector(graph, options); std::cout << "Info: selector created" << std::endl; @@ -298,39 +307,37 @@ MXReturnValue createSelector(const mxnet::ext::Graph *graph, CustomOpSelector** } REGISTER_PARTITIONER(mySelect) -.addStrategy("strategy1", "_custom_subgraph_op") -.setCreateSelector("strategy1", createSelector) -.setReviewSubgraph("strategy1", myReviewSubgraph); + .addStrategy("strategy1", "_custom_subgraph_op") + .setCreateSelector("strategy1", createSelector) + .setReviewSubgraph("strategy1", myReviewSubgraph); /* \brief a basic pass that adds a new input for subgraph ops */ -MXReturnValue addInputPass(mxnet::ext::Graph *graph, - const std::unordered_map& options) { - //find node with '_custom_subgraph_op' op type - for(int i=0; isize(); i++) { +MXReturnValue addInputPass(mxnet::ext::Graph* graph, + const std::unordered_map& options) { + // find node with '_custom_subgraph_op' op type + for (int i = 0; i < graph->size(); i++) { mxnet::ext::Node* n = graph->getNode(i); - if(n->op.compare("_custom_subgraph_op") == 0) { - //set extra input + if (n->op.compare("_custom_subgraph_op") == 0) { + // set extra input n->attrs[MX_STR_EXTRA_INPUTS] = std::to_string(1); - - //create a new input Node + + // create a new input Node Node* input = graph->addNode(n->name + "_input", "null"); - //set this node as an input in the graph + // set this node as an input in the graph graph->inputs.push_back(input); - //connect new input to node - input->outputs.push_back({n,(int)(n->inputs.size())}); - //connect node to new input - n->inputs.push_back({input,0}); + // connect new input to node + input->outputs.push_back({n, (int)(n->inputs.size())}); + // connect node to new input + n->inputs.push_back({input, 0}); // add a corresponding tensor for this input - input->alloc_arg({1},MXContext::CPU(0),kFloat32); + input->alloc_arg({1}, MXContext::CPU(0), kFloat32); } } return MX_SUCCESS; } -REGISTER_PASS(addInputPass) -.setBody(addInputPass); - +REGISTER_PASS(addInputPass).setBody(addInputPass); MXReturnValue initialize(int version) { if (version >= 10700) { From 00e8dfb9102c340b8990313ccf3c86ca8c1f86be Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 4 Nov 2021 09:01:56 +0100 Subject: [PATCH 06/10] [PLUGIN] Re-format .cc .h files --- plugin/opencv/cv_api.cc | 150 +++++++++++++++------------- plugin/opencv/cv_api.h | 37 ++++--- plugin/sframe/iter_sframe.cc | 98 +++++++++---------- plugin/torch/torch_base.cc | 8 +- plugin/torch/torch_base.h | 53 +++++----- plugin/torch/torch_criterion-inl.h | 85 ++++++++-------- plugin/torch/torch_criterion.cc | 12 +-- plugin/torch/torch_criterion.cu | 6 +- plugin/torch/torch_function.cc | 55 +++++------ plugin/torch/torch_function.h | 136 ++++++++++++++------------ plugin/torch/torch_module-inl.h | 136 +++++++++++++------------- plugin/torch/torch_module.cc | 12 +-- plugin/torch/torch_module.cu | 6 +- plugin/warpctc/warpctc-inl.h | 151 +++++++++++++---------------- plugin/warpctc/warpctc.cc | 16 +-- plugin/warpctc/warpctc.cu | 6 +- 16 files changed, 487 insertions(+), 480 deletions(-) diff --git a/plugin/opencv/cv_api.cc b/plugin/opencv/cv_api.cc index b0915fd40579..bbbe96c3226b 100644 --- a/plugin/opencv/cv_api.cc +++ b/plugin/opencv/cv_api.cc @@ -30,35 +30,37 @@ #include "cv_api.h" #include "../../src/c_api/c_api_common.h" - using namespace mxnet; // http://www.64lines.com/jpeg-width-height -// Gets the JPEG size from the array of data passed to the function, file reference: http://www.obrador.com/essentialjpeg/headerinfo.htm -bool get_jpeg_size(const unsigned char* data, mx_uint data_size, mx_uint *width, mx_uint *height) { +// Gets the JPEG size from the array of data passed to the function, file reference: +// http://www.obrador.com/essentialjpeg/headerinfo.htm +bool get_jpeg_size(const unsigned char* data, mx_uint data_size, mx_uint* width, mx_uint* height) { // Check for valid JPEG image mx_uint i = 0; // Keeps track of the position within the file - if (data[i] == 0xFF && data[i+1] == 0xD8 && data[i+2] == 0xFF && data[i+3] == 0xE0) { + if (data[i] == 0xFF && data[i + 1] == 0xD8 && data[i + 2] == 0xFF && data[i + 3] == 0xE0) { i += 4; // Check for valid JPEG header (null terminated JFIF) - if (data[i+2] == 'J' && data[i+3] == 'F' && data[i+4] == 'I' - && data[i+5] == 'F' && data[i+6] == 0x00) { + if (data[i + 2] == 'J' && data[i + 3] == 'F' && data[i + 4] == 'I' && data[i + 5] == 'F' && + data[i + 6] == 0x00) { // Retrieve the block length of the first block since // the first block will not contain the size of file - uint16_t block_length = data[i] * 256 + data[i+1]; + uint16_t block_length = data[i] * 256 + data[i + 1]; while (i < data_size) { - i+=block_length; // Increase the file index to get to the next block - if (i >= data_size) return false; // Check to protect against segmentation faults - if (data[i] != 0xFF) return false; // Check that we are truly at the start of another block - if (data[i+1] == 0xC0) { + i += block_length; // Increase the file index to get to the next block + if (i >= data_size) + return false; // Check to protect against segmentation faults + if (data[i] != 0xFF) + return false; // Check that we are truly at the start of another block + if (data[i + 1] == 0xC0) { // 0xFFC0 is the "Start of frame" marker which contains the file size // The structure of the 0xFFC0 block is quite simple // [0xFFC0][ushort length][uchar precision][ushort x][ushort y] - *height = data[i+5]*256 + data[i+6]; - *width = data[i+7]*256 + data[i+8]; + *height = data[i + 5] * 256 + data[i + 6]; + *width = data[i + 7] * 256 + data[i + 8]; return true; } else { - i+=2; // Skip the block marker - block_length = data[i] * 256 + data[i+1]; // Go to the next block + i += 2; // Skip the block marker + block_length = data[i] * 256 + data[i + 1]; // Go to the next block } } return false; // If this point is reached then no size was found @@ -70,53 +72,61 @@ bool get_jpeg_size(const unsigned char* data, mx_uint data_size, mx_uint *width, } } -bool get_png_size(const unsigned char* data, mx_uint data_size, mx_uint *width, mx_uint *height) { - if (data[0] == 0x89 && data[1] == 0x50 && data[2] ==0x4E && data[3] == 0x47) { +bool get_png_size(const unsigned char* data, mx_uint data_size, mx_uint* width, mx_uint* height) { + if (data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4E && data[3] == 0x47) { unsigned char const* p = data + 16; - *width = ((p[0]*256 + p[1])*256 + p[2])*256 + p[3]; + *width = ((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]; p += 4; - *height = ((p[0]*256 + p[1])*256 + p[2])*256 + p[3]; + *height = ((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]; return true; } else { return false; } } -MXNET_DLL int MXCVImdecode(const unsigned char *img, const mx_uint len, - const int flag, NDArrayHandle *out) { +MXNET_DLL int MXCVImdecode(const unsigned char* img, + const mx_uint len, + const int flag, + NDArrayHandle* out) { API_BEGIN(); mx_uint dims[3]; CHECK_GE(flag, 0) << "flag must be 0 (grayscale) or 1 (colored)."; dims[2] = flag == 0 ? 1 : 3; - if (get_jpeg_size(img, len, dims+1, dims)) { - } else if (get_png_size(img, len, dims+1, dims)) { + if (get_jpeg_size(img, len, dims + 1, dims)) { + } else if (get_png_size(img, len, dims + 1, dims)) { } else { LOG(FATAL) << "Only supports png and jpg."; } - NDArray ndout(mxnet::TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8); - unsigned char *img_cpy = new unsigned char[len]; - memcpy(img_cpy, img, sizeof(unsigned char)*len); - Engine::Get()->PushSync([=](RunContext ctx){ - ndout.CheckAndAlloc(); - cv::Mat buf(1, len, CV_8U, img_cpy); - cv::Mat dst(dims[0], dims[1], flag == 0 ? CV_8U : CV_8UC3, ndout.data().dptr_); + NDArray ndout(mxnet::TShape(dims, dims + 3), Context::CPU(), true, mshadow::kUint8); + unsigned char* img_cpy = new unsigned char[len]; + memcpy(img_cpy, img, sizeof(unsigned char) * len); + Engine::Get()->PushSync( + [=](RunContext ctx) { + ndout.CheckAndAlloc(); + cv::Mat buf(1, len, CV_8U, img_cpy); + cv::Mat dst(dims[0], dims[1], flag == 0 ? CV_8U : CV_8UC3, ndout.data().dptr_); #if (CV_MAJOR_VERSION > 3 || (CV_MAJOR_VERSION == 3 && CV_MINOR_VERSION >= 3)) - cv::imdecode(buf, flag | cv::IMREAD_IGNORE_ORIENTATION, &dst); + cv::imdecode(buf, flag | cv::IMREAD_IGNORE_ORIENTATION, &dst); #else - cv::imdecode(buf, flag, &dst); + cv::imdecode(buf, flag, &dst); #endif - CHECK(!dst.empty()); - delete[] img_cpy; - }, ndout.ctx(), {}, {ndout.var()}); - NDArray *tmp = new NDArray(); - *tmp = ndout; - *out = tmp; + CHECK(!dst.empty()); + delete[] img_cpy; + }, + ndout.ctx(), + {}, + {ndout.var()}); + NDArray* tmp = new NDArray(); + *tmp = ndout; + *out = tmp; API_END(); } - -MXNET_DLL int MXCVResize(NDArrayHandle src, const mx_uint w, const mx_uint h, - const int interpolation, NDArrayHandle *out) { +MXNET_DLL int MXCVResize(NDArrayHandle src, + const mx_uint w, + const mx_uint h, + const int interpolation, + NDArrayHandle* out) { API_BEGIN(); NDArray ndsrc = *static_cast(src); CHECK_EQ(ndsrc.shape().ndim(), 3); @@ -124,19 +134,23 @@ MXNET_DLL int MXCVResize(NDArrayHandle src, const mx_uint w, const mx_uint h, CHECK_EQ(ndsrc.dtype(), mshadow::kUint8); mx_uint dims[3] = {h, w, ndsrc.shape()[2]}; - NDArray ndout(mxnet::TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8); + NDArray ndout(mxnet::TShape(dims, dims + 3), Context::CPU(), true, mshadow::kUint8); - Engine::Get()->PushSync([=](RunContext ctx){ - ndout.CheckAndAlloc(); - cv::Mat buf(ndsrc.shape()[0], ndsrc.shape()[1], - dims[2] == 3 ? CV_8UC3 : CV_8U, ndsrc.data().dptr_); - cv::Mat dst(h, w, dims[2] == 3 ? CV_8UC3 : CV_8U, ndout.data().dptr_); - cv::resize(buf, dst, cv::Size(w, h), 0, 0, interpolation); - CHECK(!dst.empty()); - }, ndout.ctx(), {ndsrc.var()}, {ndout.var()}); - NDArray *tmp = new NDArray(); - *tmp = ndout; - *out = tmp; + Engine::Get()->PushSync( + [=](RunContext ctx) { + ndout.CheckAndAlloc(); + cv::Mat buf( + ndsrc.shape()[0], ndsrc.shape()[1], dims[2] == 3 ? CV_8UC3 : CV_8U, ndsrc.data().dptr_); + cv::Mat dst(h, w, dims[2] == 3 ? CV_8UC3 : CV_8U, ndout.data().dptr_); + cv::resize(buf, dst, cv::Size(w, h), 0, 0, interpolation); + CHECK(!dst.empty()); + }, + ndout.ctx(), + {ndsrc.var()}, + {ndout.var()}); + NDArray* tmp = new NDArray(); + *tmp = ndout; + *out = tmp; API_END(); } @@ -147,7 +161,7 @@ MXNET_DLL int MXCVcopyMakeBorder(NDArrayHandle src, const int right, const int type, const double value, - NDArrayHandle *out) { + NDArrayHandle* out) { API_BEGIN(); NDArray ndsrc = *static_cast(src); CHECK_EQ(ndsrc.shape().ndim(), 3); @@ -155,18 +169,22 @@ MXNET_DLL int MXCVcopyMakeBorder(NDArrayHandle src, CHECK_EQ(ndsrc.dtype(), mshadow::kUint8); int h = ndsrc.shape()[0], w = ndsrc.shape()[1], c = ndsrc.shape()[2]; - mx_uint dims[3] = {top+h+bot, left+w+right, c}; - NDArray ndout(mxnet::TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8); + mx_uint dims[3] = {top + h + bot, left + w + right, c}; + NDArray ndout(mxnet::TShape(dims, dims + 3), Context::CPU(), true, mshadow::kUint8); - Engine::Get()->PushSync([=](RunContext ctx){ - ndout.CheckAndAlloc(); - cv::Mat buf(h, w, c == 3 ? CV_8UC3 : CV_8U, ndsrc.data().dptr_); - cv::Mat dst(top+h+bot, left+w+right, c == 3 ? CV_8UC3 : CV_8U, ndout.data().dptr_); - cv::copyMakeBorder(buf, dst, top, bot, left, right, type, cv::Scalar(value)); - CHECK(!dst.empty()); - }, ndout.ctx(), {ndsrc.var()}, {ndout.var()}); - NDArray *tmp = new NDArray(); - *tmp = ndout; - *out = tmp; + Engine::Get()->PushSync( + [=](RunContext ctx) { + ndout.CheckAndAlloc(); + cv::Mat buf(h, w, c == 3 ? CV_8UC3 : CV_8U, ndsrc.data().dptr_); + cv::Mat dst(top + h + bot, left + w + right, c == 3 ? CV_8UC3 : CV_8U, ndout.data().dptr_); + cv::copyMakeBorder(buf, dst, top, bot, left, right, type, cv::Scalar(value)); + CHECK(!dst.empty()); + }, + ndout.ctx(), + {ndsrc.var()}, + {ndout.var()}); + NDArray* tmp = new NDArray(); + *tmp = ndout; + *out = tmp; API_END(); } diff --git a/plugin/opencv/cv_api.h b/plugin/opencv/cv_api.h index b318041eb6b9..c8ab701e0521 100644 --- a/plugin/opencv/cv_api.h +++ b/plugin/opencv/cv_api.h @@ -28,27 +28,24 @@ #include -MXNET_DLL int MXCVImdecode( - const unsigned char *img, - const mx_uint len, - const int flag, - NDArrayHandle *out); +MXNET_DLL int MXCVImdecode(const unsigned char* img, + const mx_uint len, + const int flag, + NDArrayHandle* out); -MXNET_DLL int MXCVResize( - NDArrayHandle src, - const mx_uint w, - const mx_uint h, - const int interpolation, - NDArrayHandle *out); +MXNET_DLL int MXCVResize(NDArrayHandle src, + const mx_uint w, + const mx_uint h, + const int interpolation, + NDArrayHandle* out); -MXNET_DLL int MXCVcopyMakeBorder( - NDArrayHandle src, - const int top, - const int bot, - const int left, - const int right, - const int type, - const double value, - NDArrayHandle *out); +MXNET_DLL int MXCVcopyMakeBorder(NDArrayHandle src, + const int top, + const int bot, + const int left, + const int right, + const int type, + const double value, + NDArrayHandle* out); #endif // PLUGIN_OPENCV_CV_API_H_ diff --git a/plugin/sframe/iter_sframe.cc b/plugin/sframe/iter_sframe.cc index 6a6b03f9c2fb..8834e8872ab4 100644 --- a/plugin/sframe/iter_sframe.cc +++ b/plugin/sframe/iter_sframe.cc @@ -22,7 +22,7 @@ * \file iter_sframe_image.cc * \brief * \author Bing Xu -*/ + */ #include #include @@ -53,16 +53,17 @@ struct SFrameParam : public dmlc::Parameter { mxnet::TShape data_shape; mxnet::TShape label_shape; DMLC_DECLARE_PARAMETER(SFrameParam) { - DMLC_DECLARE_FIELD(path_sframe).set_default("") - .describe("Dataset Param: path to image dataset sframe"); - DMLC_DECLARE_FIELD(data_field).set_default("data") - .describe("Dataset Param: data column in sframe"); - DMLC_DECLARE_FIELD(label_field).set_default("label") - .describe("Dataset Param: label column in sframe"); - DMLC_DECLARE_FIELD(data_shape) - .describe("Dataset Param: input data instance shape"); - DMLC_DECLARE_FIELD(label_shape) - .describe("Dataset Param: input label instance shape"); + DMLC_DECLARE_FIELD(path_sframe) + .set_default("") + .describe("Dataset Param: path to image dataset sframe"); + DMLC_DECLARE_FIELD(data_field) + .set_default("data") + .describe("Dataset Param: data column in sframe"); + DMLC_DECLARE_FIELD(label_field) + .set_default("label") + .describe("Dataset Param: label column in sframe"); + DMLC_DECLARE_FIELD(data_shape).describe("Dataset Param: input data instance shape"); + DMLC_DECLARE_FIELD(label_shape).describe("Dataset Param: input label instance shape"); } }; // struct SFrameImageParam @@ -80,12 +81,12 @@ class SFrameIterBase : public IIterator { virtual ~SFrameIterBase() {} virtual void BeforeFirst() { - idx_ = 0; - *range_it_ = sframe_.range_iterator(); + idx_ = 0; + *range_it_ = sframe_.range_iterator(); current_it_ = range_it_->begin(); } - virtual const DataInst &Value(void) const { + virtual const DataInst& Value(void) const { return out_; } @@ -109,8 +110,8 @@ class SFrameIterBase : public IIterator { protected: /*! \brief copy data */ - template - void Copy_(mshadow::Tensor tensor, const graphlab::flex_vec &vec) { + template + void Copy_(mshadow::Tensor tensor, const graphlab::flex_vec& vec) { CHECK_EQ(tensor.shape_.Size(), vec.size()); CHECK_EQ(tensor.CheckContiguous(), true); mshadow::Tensor flatten(tensor.dptr_, mshadow::Shape1(tensor.shape_.Size())); @@ -122,14 +123,12 @@ class SFrameIterBase : public IIterator { class SFrameImageIter : public SFrameIterBase { public: - SFrameImageIter() : - augmenter_(new ImageAugmenter()), prnd_(new common::RANDOM_ENGINE(8964)) {} + SFrameImageIter() : augmenter_(new ImageAugmenter()), prnd_(new common::RANDOM_ENGINE(8964)) {} void Init(const std::vector >& kwargs) override { Parent::Init(kwargs); augmenter_->Init(kwargs); - CHECK_EQ(Parent::param_.data_shape.ndim(), 3) - << "Image shpae must be (channel, height, width)"; + CHECK_EQ(Parent::param_.data_shape.ndim(), 3) << "Image shpae must be (channel, height, width)"; } bool Next(void) override { @@ -141,21 +140,22 @@ class SFrameImageIter : public SFrameIterBase { // TODO(bing): check not decoded // TODO(bing): check img shape CHECK_EQ(gl_label.size(), Parent::param_.label_shape.Size()) << "Label shape does not match"; - const unsigned char *raw_data = gl_img.get_image_data(); + const unsigned char* raw_data = gl_img.get_image_data(); cv::Mat res; cv::Mat buf(1, gl_img.m_image_data_size, CV_8U, const_cast(raw_data)); - res = cv::imdecode(buf, -1); - res = augmenter_->Process(res, prnd_.get()); + res = cv::imdecode(buf, -1); + res = augmenter_->Process(res, prnd_.get()); const int n_channels = res.channels(); if (!tmp_.Size()) { - tmp_.Push(Parent::idx_++, - Parent::param_.data_shape.get<3>(), - Parent::param_.label_shape.get<1>()); + tmp_.Push( + Parent::idx_++, Parent::param_.data_shape.get<3>(), Parent::param_.label_shape.get<1>()); } mshadow::Tensor data = Parent::tmp_.data().Back(); std::vector swap_indices; - if (n_channels == 1) swap_indices = {0}; - if (n_channels == 3) swap_indices = {2, 1, 0}; + if (n_channels == 1) + swap_indices = {0}; + if (n_channels == 3) + swap_indices = {2, 1, 0}; for (int i = 0; i < res.rows; ++i) { uchar* im_data = res.ptr(i); for (int j = 0; j < res.cols; ++j) { @@ -188,14 +188,13 @@ class SFrameDataIter : public SFrameIterBase { if (Parent::current_it_ == Parent::range_it_->end()) { return false; } - graphlab::flex_vec gl_data = (*Parent::current_it_)[0]; + graphlab::flex_vec gl_data = (*Parent::current_it_)[0]; graphlab::flex_vec gl_label = (*Parent::current_it_)[1]; CHECK_EQ(gl_data.size(), Parent::param_.data_shape.Size()) << "Data shape does not match"; CHECK_EQ(gl_label.size(), Parent::param_.label_shape.Size()) << "Label shape does not match"; if (!Parent::tmp_.Size()) { - Parent::tmp_.Push(Parent::idx_++, - Parent::param_.data_shape.get<3>(), - Parent::param_.label_shape.get<1>()); + Parent::tmp_.Push( + Parent::idx_++, Parent::param_.data_shape.get<3>(), Parent::param_.label_shape.get<1>()); } mshadow::Tensor data = Parent::tmp_.data().Back(); Parent::Copy_<3>(data, gl_data); @@ -214,31 +213,22 @@ class SFrameDataIter : public SFrameIterBase { DMLC_REGISTER_PARAMETER(SFrameParam); MXNET_REGISTER_IO_ITER(SFrameImageIter) -.describe("Naive SFrame image iterator prototype") -.add_arguments(SFrameParam::__FIELDS__()) -.add_arguments(BatchParam::__FIELDS__()) -.add_arguments(PrefetcherParam::__FIELDS__()) -.add_arguments(ImageAugmentParam::__FIELDS__()) -.add_arguments(ImageNormalizeParam::__FIELDS__()) -.set_body([]() { - return new PrefetcherIter( - new BatchLoader( - new ImageNormalizeIter( - new SFrameImageIter()))); + .describe("Naive SFrame image iterator prototype") + .add_arguments(SFrameParam::__FIELDS__()) + .add_arguments(BatchParam::__FIELDS__()) + .add_arguments(PrefetcherParam::__FIELDS__()) + .add_arguments(ImageAugmentParam::__FIELDS__()) + .add_arguments(ImageNormalizeParam::__FIELDS__()) + .set_body([]() { + return new PrefetcherIter(new BatchLoader(new ImageNormalizeIter(new SFrameImageIter()))); }); MXNET_REGISTER_IO_ITER(SFrameDataIter) -.describe("Naive SFrame data iterator prototype") -.add_arguments(SFrameParam::__FIELDS__()) -.add_arguments(BatchParam::__FIELDS__()) -.add_arguments(PrefetcherParam::__FIELDS__()) -.set_body([]() { - return new PrefetcherIter( - new BatchLoader( - new SFrameDataIter())); - }); - + .describe("Naive SFrame data iterator prototype") + .add_arguments(SFrameParam::__FIELDS__()) + .add_arguments(BatchParam::__FIELDS__()) + .add_arguments(PrefetcherParam::__FIELDS__()) + .set_body([]() { return new PrefetcherIter(new BatchLoader(new SFrameDataIter())); }); } // namespace io } // namespace mxnet - diff --git a/plugin/torch/torch_base.cc b/plugin/torch/torch_base.cc index 8a9d85b06465..0dcb73d29a07 100644 --- a/plugin/torch/torch_base.cc +++ b/plugin/torch/torch_base.cc @@ -22,7 +22,7 @@ * \file torch_base.cc * \brief torch_state * \author Junyuan Xie -*/ + */ #include "./torch_base.h" namespace mxnet { @@ -40,7 +40,7 @@ TorchState::TorchState() { "require 'cudnn'\n" #endif // MXNET_USE_CUDNN #endif // MXNET_USE_CUDA - ); // NOLINT(*) + ); // NOLINT(*) int err = lua_pcall(L, 0, 0, 0); CHECK_EQ(err, 0) << lua_tostring(L, -1); } @@ -53,13 +53,13 @@ TorchState* TorchState::ThreadSharedLuaState() { return state; } -template<> +template <> void TorchState::SetStream(mshadow::Stream* s) { return; } #if MXNET_USE_CUDA -template<> +template <> void TorchState::SetStream(mshadow::Stream* s) { CudaState()->currentStream = mshadow::Stream::GetStream(s); } diff --git a/plugin/torch/torch_base.h b/plugin/torch/torch_base.h index 04bee24974bf..9c573daa70e7 100644 --- a/plugin/torch/torch_base.h +++ b/plugin/torch/torch_base.h @@ -65,26 +65,26 @@ class TorchState { } #endif // MXNET_USE_CUDA - template + template void SetStream(mshadow::Stream* s); void PrintState() { int i; int top = lua_gettop(L); LOG(INFO) << "Stack height: " << top; - for (i = 1; i <= top; i++) { /* repeat for each level */ + for (i = 1; i <= top; i++) { /* repeat for each level */ int t = lua_type(L, i); switch (t) { - case LUA_TSTRING: /* strings */ + case LUA_TSTRING: /* strings */ LOG(INFO) << i << ": '" << lua_tostring(L, i) << "'"; break; - case LUA_TBOOLEAN: /* booleans */ + case LUA_TBOOLEAN: /* booleans */ LOG(INFO) << i << ": " << (lua_toboolean(L, i) ? "true" : "false"); break; - case LUA_TNUMBER: /* numbers */ + case LUA_TNUMBER: /* numbers */ LOG(INFO) << i << ": " << lua_tonumber(L, i); break; - default: /* other values */ + default: /* other values */ LOG(INFO) << i << ": " << lua_typename(L, t); break; } @@ -152,7 +152,7 @@ class TorchTensor { } static THGeneralTensor TBlobToTHTensor(TorchState* torchState, TBlob data) { - size_t size = data.Size(); + size_t size = data.Size(); THGeneralTensor tensor = NULL; THLongStorage* thshape = THLongStorage_newWithSize(data.ndim()); for (int i = 0; i < data.ndim(); ++i) { @@ -161,8 +161,8 @@ class TorchTensor { CHECK_EQ(data.type_flag_, mshadow::kFloat32) << "Torch Interface only support float32"; switch (data.dev_mask()) { case cpu::kDevMask: { - THFloatStorage* storage = THFloatStorage_newWithData(static_cast(data.dptr_), - size); + THFloatStorage* storage = + THFloatStorage_newWithData(static_cast(data.dptr_), size); THFloatStorage_clearFlag(storage, TH_STORAGE_FREEMEM); tensor = (THGeneralTensor)THFloatTensor_newWithStorage(storage, 0, thshape, NULL); THFloatStorage_free(storage); @@ -171,8 +171,8 @@ class TorchTensor { #if MXNET_USE_CUDA case gpu::kDevMask: { THCState* state = torchState->CudaState(); - THCudaStorage* storage = THCudaStorage_newWithData(state, static_cast(data.dptr_), - size); + THCudaStorage* storage = + THCudaStorage_newWithData(state, static_cast(data.dptr_), size); // a bug in cutorch THFloatStorage_clearFlag(reinterpret_cast(storage), TH_STORAGE_FREEMEM); tensor = (THGeneralTensor)THCudaTensor_newWithStorage(state, storage, 0, thshape, NULL); @@ -197,7 +197,7 @@ class TorchTensor { } #if MXNET_USE_CUDA case gpu::kDevMask: { - THCState* state = torchState->CudaState(); + THCState* state = torchState->CudaState(); THCudaStorage* original = static_cast(tensor)->storage; THCudaStorage_free(state, original); break; @@ -212,10 +212,10 @@ class TorchTensor { size_t size = blob.Size(); switch (blob.dev_mask()) { case cpu::kDevMask: { - THFloatStorage* storage = THFloatStorage_newWithData(static_cast(blob.dptr_), - size); + THFloatStorage* storage = + THFloatStorage_newWithData(static_cast(blob.dptr_), size); THFloatStorage_clearFlag(storage, TH_STORAGE_FREEMEM); - THFloatStorage* original = static_cast(tensor)->storage; + THFloatStorage* original = static_cast(tensor)->storage; static_cast(tensor)->storage = storage; THFloatStorage_free(original); break; @@ -223,12 +223,11 @@ class TorchTensor { #if MXNET_USE_CUDA case gpu::kDevMask: { THCState* state = torchState->CudaState(); - THCudaStorage* storage = THCudaStorage_newWithData(state, - static_cast(blob.dptr_), - size); + THCudaStorage* storage = + THCudaStorage_newWithData(state, static_cast(blob.dptr_), size); // TODO(min): torch bug Cuda version not implemented THFloatStorage_clearFlag(reinterpret_cast(storage), TH_STORAGE_FREEMEM); - THCudaStorage* original = static_cast(tensor)->storage; + THCudaStorage* original = static_cast(tensor)->storage; static_cast(tensor)->storage = storage; THCudaStorage_free(state, original); break; @@ -240,9 +239,9 @@ class TorchTensor { } static std::vector TBlobVectorAsTable( - TorchState* torchState, - const std::vector::const_iterator begin, - const std::vector::const_iterator end) { + TorchState* torchState, + const std::vector::const_iterator begin, + const std::vector::const_iterator end) { lua_State* L = torchState->L; std::vector res; int num = end - begin; @@ -269,16 +268,16 @@ class TorchTensor { lua_State* L = torchState->L; if (luaT_isudata(L, -1, TorchTensor::TensorType(cpu::kDevMask))) { CHECK_EQ(dst.dev_mask(), cpu::kDevMask) << "Device type mismatch."; - THFloatTensor* src = static_cast( - luaT_toudata(L, -1, TorchTensor::TensorType(cpu::kDevMask))); + THFloatTensor* src = + static_cast(luaT_toudata(L, -1, TorchTensor::TensorType(cpu::kDevMask))); if (src->storage != static_cast(th_dst)->storage) { THFloatTensor_copy(static_cast(th_dst), src); } #if MXNET_USE_CUDA } else if (luaT_isudata(L, -1, TorchTensor::TensorType(gpu::kDevMask))) { CHECK_EQ(dst.dev_mask(), gpu::kDevMask) << "Device type mismatch."; - THCudaTensor* src = static_cast( - luaT_toudata(L, -1, TorchTensor::TensorType(gpu::kDevMask))); + THCudaTensor* src = + static_cast(luaT_toudata(L, -1, TorchTensor::TensorType(gpu::kDevMask))); if (src->storage != static_cast(th_dst)->storage) { THCudaTensor_copy(torchState->CudaState(), static_cast(th_dst), src); } @@ -294,7 +293,7 @@ class TorchTensor { std::vector::const_iterator th_begin, std::vector::const_iterator th_end) { lua_State* L = torchState->L; - int num = end - begin; + int num = end - begin; CHECK_EQ(th_end - th_begin, num); if (num == 0) { } else if (num == 1) { diff --git a/plugin/torch/torch_criterion-inl.h b/plugin/torch/torch_criterion-inl.h index 2138bd8f1335..c77fc5ab6e47 100644 --- a/plugin/torch/torch_criterion-inl.h +++ b/plugin/torch/torch_criterion-inl.h @@ -22,7 +22,7 @@ * \file torch_module-inl.h * \brief torch module operator * \author Min Lin -*/ + */ #ifndef PLUGIN_TORCH_TORCH_CRITERION_INL_H_ #define PLUGIN_TORCH_TORCH_CRITERION_INL_H_ @@ -46,14 +46,14 @@ struct TorchCriterionParam : public dmlc::Parameter { float grad_scale; DMLC_DECLARE_PARAMETER(TorchCriterionParam) { DMLC_DECLARE_FIELD(lua_string) - .describe("lua string that is called to generate the torch criterion object"); + .describe("lua string that is called to generate the torch criterion object"); DMLC_DECLARE_FIELD(label_shape) - .set_default(mxnet::TShape()) - .enforce_nonzero() - .describe("Shape of label (without batch size)."); + .set_default(mxnet::TShape()) + .enforce_nonzero() + .describe("Shape of label (without batch size)."); DMLC_DECLARE_FIELD(grad_scale) - .set_default(1.0f) - .describe("Scale the gradient by a float factor (a.k.a weight of this loss)."); + .set_default(1.0f) + .describe("Scale the gradient by a float factor (a.k.a weight of this loss)."); } }; @@ -61,7 +61,7 @@ struct TorchCriterionParam : public dmlc::Parameter { * \brief This is the implementation of activation operator. * \tparam xpu The device that the op will be executed on. */ -template +template class TorchCriterionOp : public Operator { private: TorchCriterionParam param_; @@ -70,12 +70,12 @@ class TorchCriterionOp : public Operator { public: explicit TorchCriterionOp(TorchCriterionParam p) { - this->param_ = p; + this->param_ = p; this->torchState_ = new TorchState(); - lua_State *L = torchState_->L; + lua_State* L = torchState_->L; CHECK_EQ(lua_gettop(L), 0); - std::string exec = std::string("return ") + p.lua_string - + TorchTensor::ModuleType(xpu::kDevMask); + std::string exec = + std::string("return ") + p.lua_string + TorchTensor::ModuleType(xpu::kDevMask); CHECK_EQ(luaL_loadstring(L, exec.c_str()), 0); int err = lua_pcall(L, 0, 1, 0); CHECK_EQ(err, 0) << lua_tostring(L, -1); @@ -87,17 +87,17 @@ class TorchCriterionOp : public Operator { delete this->torchState_; } - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { + virtual void Forward(const OpContext& ctx, + const std::vector& in_data, + const std::vector& req, + const std::vector& out_data, + const std::vector& aux_args) { using namespace mshadow; - lua_State *L = torchState_->L; + lua_State* L = torchState_->L; CHECK_EQ(lua_gettop(L), 0); CHECK_EQ(in_data.size(), 2); CHECK_EQ(out_data.size(), 1); - Stream *s = ctx.get_stream(); + Stream* s = ctx.get_stream(); torchState_->SetStream(s); lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_); // call forward @@ -117,26 +117,26 @@ class TorchCriterionOp : public Operator { real_t loss = static_cast(lua_tonumber(L, -1)); lua_pop(L, 1); Tensor out = out_data[0].FlatTo2D(s); - Assign(out, req[0], loss*param_.grad_scale); + Assign(out, req[0], loss * param_.grad_scale); lua_pop(L, 1); CHECK_EQ(lua_gettop(L), 0); } - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { + virtual void Backward(const OpContext& ctx, + const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data, + const std::vector& req, + const std::vector& in_grad, + const std::vector& aux_args) { using namespace mshadow; - lua_State *L = torchState_->L; + lua_State* L = torchState_->L; CHECK_EQ(lua_gettop(L), 0); CHECK_EQ(in_data.size(), 2); CHECK_EQ(out_data.size(), 1); CHECK_EQ(req[0], kWriteTo) << "Torch Criterion only supports write to in_grad"; CHECK_EQ(req[1], kNullOp) << "Torch Criterion cannot back prop to label"; - Stream *s = ctx.get_stream(); + Stream* s = ctx.get_stream(); torchState_->SetStream(s); lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_); THGeneralTensor th = TorchTensor::TBlobToTHTensor(torchState_, in_grad[0]); @@ -161,7 +161,7 @@ class TorchCriterionOp : public Operator { }; // class TorchCriterionOp // Decalre Factory function, used for dispatch specialization -template +template Operator* CreateOp(TorchCriterionParam type); #if DMLC_USE_CXX11 @@ -183,17 +183,19 @@ class TorchCriterionProp : public OperatorProperty { return param_.__DICT__(); } - bool InferShape(mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - mxnet::ShapeVector *aux_shape) const override { + bool InferShape(mxnet::ShapeVector* in_shape, + mxnet::ShapeVector* out_shape, + mxnet::ShapeVector* aux_shape) const override { using namespace mshadow; CHECK_EQ(in_shape->size(), 2); - const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + const mxnet::TShape& dshape = in_shape->at(0); + if (dshape.ndim() == 0) + return false; std::vector lshape; lshape.push_back(dshape[0]); - lshape.insert(lshape.end(), param_.label_shape.data(), - param_.label_shape.data() + param_.label_shape.ndim()); + lshape.insert(lshape.end(), + param_.label_shape.data(), + param_.label_shape.data() + param_.label_shape.ndim()); mxnet::TShape shape(lshape.begin(), lshape.end()); SHAPE_ASSIGN_CHECK(*in_shape, 1, shape); out_shape->clear(); @@ -202,7 +204,7 @@ class TorchCriterionProp : public OperatorProperty { } OperatorProperty* Copy() const override { - auto ptr = new TorchCriterionProp(); + auto ptr = new TorchCriterionProp(); ptr->param_ = param_; return ptr; } @@ -212,10 +214,9 @@ class TorchCriterionProp : public OperatorProperty { } // decalre dependency and inplace optimization options - std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + std::vector DeclareBackwardDependency(const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data) const override { std::vector dep; dep.insert(dep.end(), in_data.begin(), in_data.end()); // Ensure that the backward and forward cannot be called at the same time diff --git a/plugin/torch/torch_criterion.cc b/plugin/torch/torch_criterion.cc index 110a58156a26..7b46a725b618 100644 --- a/plugin/torch/torch_criterion.cc +++ b/plugin/torch/torch_criterion.cc @@ -22,27 +22,27 @@ * \file activation.cc * \brief activation op * \author Junyuan Xie -*/ + */ #include "./torch_criterion-inl.h" #include "../../src/operator/mshadow_op.h" namespace mxnet { namespace op { -template<> -Operator *CreateOp(TorchCriterionParam param) { +template <> +Operator* CreateOp(TorchCriterionParam param) { return new TorchCriterionOp(param); } // DO_BIND_DISPATCH comes from operator_common.h -Operator *TorchCriterionProp::CreateOperator(Context ctx) const { +Operator* TorchCriterionProp::CreateOperator(Context ctx) const { DO_BIND_DISPATCH(CreateOp, param_); } DMLC_REGISTER_PARAMETER(TorchCriterionParam); MXNET_REGISTER_OP_PROPERTY(TorchCriterion, TorchCriterionProp) -.describe("Criterions from torch.") -.add_arguments(TorchCriterionParam::__FIELDS__()); + .describe("Criterions from torch.") + .add_arguments(TorchCriterionParam::__FIELDS__()); } // namespace op } // namespace mxnet diff --git a/plugin/torch/torch_criterion.cu b/plugin/torch/torch_criterion.cu index ccb7145f36af..0b22722d756a 100644 --- a/plugin/torch/torch_criterion.cu +++ b/plugin/torch/torch_criterion.cu @@ -22,14 +22,14 @@ * \file activation.cc * \brief activation op * \author Bing Xu -*/ + */ #include "./torch_criterion-inl.h" #include "../../src/operator/mshadow_op.h" namespace mxnet { namespace op { -template<> -Operator *CreateOp(TorchCriterionParam param) { +template <> +Operator* CreateOp(TorchCriterionParam param) { return new TorchCriterionOp(param); } diff --git a/plugin/torch/torch_function.cc b/plugin/torch/torch_function.cc index 3ec9a000acfd..bb802ce67e7a 100644 --- a/plugin/torch/torch_function.cc +++ b/plugin/torch/torch_function.cc @@ -22,7 +22,7 @@ * \file torch_base.cc * \brief torch_state * \author Junyuan Xie -*/ + */ #include "./torch_function.h" namespace mxnet { @@ -51,8 +51,10 @@ MXNET_REGISTER_TORCH_UNARY_FUN(_th_floor, floor); MXNET_REGISTER_TORCH_UNARY_FUN(_th_log, log); MXNET_REGISTER_TORCH_UNARY_FUN(_th_log1p, log1p); MXNET_REGISTER_TORCH_UNARY_FUN(_th_pow, pow) -.add_argument("n", "float", "pow(x, n) returns x^n, element-wise. " - "pow(n, x) returns n^x, element-wise."); + .add_argument("n", + "float", + "pow(x, n) returns x^n, element-wise. " + "pow(n, x) returns n^x, element-wise."); MXNET_REGISTER_TORCH_UNARY_FUN(_th_round, round); MXNET_REGISTER_TORCH_UNARY_FUN(_th_sin, sin); MXNET_REGISTER_TORCH_UNARY_FUN(_th_sinh, sinh); @@ -62,7 +64,7 @@ MXNET_REGISTER_TORCH_UNARY_FUN(_th_tanh, tanh); // Basic operations MXNET_REGISTER_TORCH_UNARY_FUN(_th_add_scalar, add) -.add_argument("value", "float", "Add value to all elements in x"); + .add_argument("value", "float", "Add value to all elements in x"); MXNET_REGISTER_TORCH_BINARY_FUN_WITH_ARG(_th_add, add); MXNET_REGISTER_TORCH_BINARY_FUN(_th_add_axpy, add); @@ -70,7 +72,7 @@ MXNET_REGISTER_TORCH_BINARY_FUN(_th_add_axpy, add); // MXNET_REGISTER_TORCH_BINARY_FUN_WITH_ARG(_th_csub, csub); MXNET_REGISTER_TORCH_UNARY_FUN(_th_mul_scalar, mul) -.add_argument("value", "float", "Multiply value to all elements in x"); + .add_argument("value", "float", "Multiply value to all elements in x"); MXNET_REGISTER_TORCH_BINARY_FUN_WITH_ARG(_th_cmul, cmul); MXNET_REGISTER_TORCH_UNARY_FUN(_th_clamp, clamp); @@ -78,7 +80,7 @@ MXNET_REGISTER_TORCH_BINARY_FUN_WITH_ARG(_th_cpow, cpow); MXNET_REGISTER_TORCH_TENARY_FUN(_th_addcmul, addcmul); MXNET_REGISTER_TORCH_UNARY_FUN(_th_div_scalar, div) -.add_argument("value", "float", "Divide all elements in x by value"); + .add_argument("value", "float", "Divide all elements in x by value"); MXNET_REGISTER_TORCH_BINARY_FUN_WITH_ARG(_th_cdiv, cdiv); MXNET_REGISTER_TORCH_TENARY_FUN(_th_addcdiv, addcdiv); @@ -89,67 +91,66 @@ MXNET_REGISTER_TORCH_TENARY_FUN(_th_addbmm, addbmm); MXNET_REGISTER_TORCH_TENARY_FUN(_th_baddbmm, baddbmm); struct TorchMMShape { - static std::vector GetShape(NDArray **u, - const std::map& param) { + static std::vector GetShape(NDArray** u, + const std::map& param) { CHECK_EQ(u[0]->shape().ndim(), 2); CHECK_EQ(u[1]->shape().ndim(), 2); CHECK_EQ(u[0]->shape()[1], u[1]->shape()[0]); index_t shape[] = {u[0]->shape()[0], u[1]->shape()[1]}; - mshadow::TShape tshape(shape, shape+2); + mshadow::TShape tshape(shape, shape + 2); return {tshape}; } static constexpr const char* fname = "mm"; - static const int num_inputs = 2; - static const int num_outputs = 1; + static const int num_inputs = 2; + static const int num_outputs = 1; }; MXNET_REGISTER_TORCH_FUN(_th_mm, TorchMMShape); struct TorchMVShape { - static std::vector GetShape(NDArray **u, - const std::map& param) { + static std::vector GetShape(NDArray** u, + const std::map& param) { CHECK_EQ(u[0]->shape().ndim(), 2); CHECK_EQ(u[1]->shape().ndim(), 1); CHECK_EQ(u[0]->shape()[1], u[1]->shape()[0]); index_t shape[] = {u[0]->shape()[0]}; - mshadow::TShape tshape(shape, shape+1); + mshadow::TShape tshape(shape, shape + 1); return {tshape}; } static constexpr const char* fname = "mv"; - static const int num_inputs = 2; - static const int num_outputs = 1; + static const int num_inputs = 2; + static const int num_outputs = 1; }; MXNET_REGISTER_TORCH_FUN(_th_mv, TorchMVShape); - struct TorchBMMShape { - static std::vector GetShape(NDArray **u, - const std::map& param) { + static std::vector GetShape(NDArray** u, + const std::map& param) { CHECK_EQ(u[0]->shape().ndim(), 3); CHECK_EQ(u[1]->shape().ndim(), 3); CHECK_EQ(u[0]->shape()[0], u[1]->shape()[0]); CHECK_EQ(u[0]->shape()[2], u[1]->shape()[1]); index_t shape[] = {u[0]->shape()[1], u[1]->shape()[2]}; - mshadow::TShape tshape(shape, shape+2); + mshadow::TShape tshape(shape, shape + 2); return {tshape}; } static constexpr const char* fname = "bmm"; - static const int num_inputs = 2; - static const int num_outputs = 1; + static const int num_inputs = 2; + static const int num_outputs = 1; }; MXNET_REGISTER_TORCH_FUN(_th_bmm, TorchBMMShape); struct TorchGERShape { - static std::vector GetShape(NDArray **u, - const std::map& param) { + static std::vector GetShape(NDArray** u, + const std::map& param) { CHECK_EQ(u[0]->shape().ndim(), 1); CHECK_EQ(u[1]->shape().ndim(), 1); index_t shape[] = {u[0]->shape()[0], u[1]->shape()[0]}; - mshadow::TShape tshape(shape, shape+2); + mshadow::TShape tshape(shape, shape + 2); return {tshape}; } static constexpr const char* fname = "ger"; - static const int num_inputs = 2; - static const int num_outputs = 1; + static const int num_inputs = 2; + static const int num_outputs = 1; }; MXNET_REGISTER_TORCH_FUN(_th_ger, TorchGERShape); diff --git a/plugin/torch/torch_function.h b/plugin/torch/torch_function.h index f6f760231bdf..32917cf7f39c 100644 --- a/plugin/torch/torch_function.h +++ b/plugin/torch/torch_function.h @@ -37,7 +37,7 @@ namespace mxnet { -template +template void TorchRunOp(std::vector arr_in, std::vector arr_out, const std::map& param, @@ -84,16 +84,17 @@ void TorchRunOp(std::vector arr_in, CHECK_EQ(lua_pcall(L, format.size(), 0, 0), 0) << "Lua Error: " << lua_tostring(L, -1); } -template -void TorchOp(NDArray **u, real_t *s, NDArray **out, +template +void TorchOp(NDArray** u, + real_t* s, + NDArray** out, const std::map& param) { std::vector shapes = OP::GetShape(u, param); - CHECK_EQ(shapes.size(), OP::num_outputs) - << "Too many output shapes for TorchOp " << OP::fname; + CHECK_EQ(shapes.size(), OP::num_outputs) << "Too many output shapes for TorchOp " << OP::fname; Context ctx; int type_flag; if (OP::num_inputs) { - ctx = u[0]->ctx(); + ctx = u[0]->ctx(); type_flag = u[0]->dtype(); for (int i = 0; i < OP::num_inputs; ++i) { CHECK_EQ(ctx, u[i]->ctx()) << "Context of all oprands must be the same."; @@ -138,37 +139,49 @@ void TorchOp(NDArray **u, real_t *s, NDArray **out, var_in.resize(std::unique(var_in.begin(), var_in.end()) - var_in.begin()); std::sort(var_out.begin(), var_out.end()); var_out.resize(std::unique(var_out.begin(), var_out.end()) - var_out.begin()); - std::set_difference(var_in.begin(), var_in.end(), var_out.begin(), var_out.end(), + std::set_difference(var_in.begin(), + var_in.end(), + var_out.begin(), + var_out.end(), std::inserter(var_const, var_const.begin())); switch (ctx.dev_mask()) { case mshadow::cpu::kDevMask: { - Engine::Get()->PushSync([arr_in, arr_out, param](RunContext rctx) { - TorchRunOp(arr_in, arr_out, param, rctx); - }, ctx, var_const, var_out); + Engine::Get()->PushSync( + [arr_in, arr_out, param](RunContext rctx) { + TorchRunOp(arr_in, arr_out, param, rctx); + }, + ctx, + var_const, + var_out); break; } #if MXNET_USE_CUDA case gpu::kDevMask: { - Engine::Get()->PushSync([arr_in, arr_out, param](RunContext rctx) { - TorchRunOp(arr_in, arr_out, param, rctx); - }, ctx, var_const, var_out); + Engine::Get()->PushSync( + [arr_in, arr_out, param](RunContext rctx) { + TorchRunOp(arr_in, arr_out, param, rctx); + }, + ctx, + var_const, + var_out); break; } #endif - default: LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; + default: + LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; } } struct TorchFirstShape { - static std::vector GetShape(NDArray **u, - const std::map& param) { + static std::vector GetShape(NDArray** u, + const std::map& param) { return {u[0]->shape()}; } }; struct TorchConstructorShape { - static std::vector GetShape(NDArray **u, - const std::map& param) { + static std::vector GetShape(NDArray** u, + const std::map& param) { std::vector shape; std::string format = param.at("format"); std::istringstream args(param.at("args")); @@ -183,53 +196,52 @@ struct TorchConstructorShape { mshadow::TShape tshape(shape.begin(), shape.end()); return {tshape}; } - static const int num_inputs = 0; + static const int num_inputs = 0; static const int num_outputs = 1; }; -#define MXNET_REGISTER_TORCH_FUN(name, OP) \ - MXNET_REGISTER_NDARRAY_FUN(name) \ - .set_function(TorchOp) \ - .set_num_use_vars(OP::num_inputs) \ - .set_num_mutate_vars(OP::num_outputs) \ - .set_type_mask(kAcceptEmptyMutateTarget) - -#define MXNET_REGISTER_TORCH_UNARY_FUN(name, func) \ - struct TorchUnaryOpDesc_ ## name ## _ ## func : public TorchFirstShape { \ - static constexpr const char* fname = #func; \ - static const int num_inputs = 1; \ - static const int num_outputs = 1; \ - }; \ - MXNET_REGISTER_TORCH_FUN(name, TorchUnaryOpDesc_ ## name ## _ ## func) \ - .add_argument("x", "NDArray", "Input NDArray") - -#define MXNET_REGISTER_TORCH_BINARY_FUN(name, func) \ - struct TorchBinaryOpDesc_ ## name ## _ ## func : public TorchFirstShape { \ - static constexpr const char* fname = #func; \ - static const int num_inputs = 2; \ - static const int num_outputs = 1; \ - }; \ - MXNET_REGISTER_TORCH_FUN(name, TorchBinaryOpDesc_ ## name ## _ ## func) - -#define MXNET_REGISTER_TORCH_BINARY_FUN_WITH_ARG(name, func) \ - MXNET_REGISTER_TORCH_BINARY_FUN(name, func) \ - .add_argument("x1", "NDArray", "First Input NDArray") \ - .add_argument("x2", "NDArray", "Second Input NDArray") - -#define MXNET_REGISTER_TORCH_TENARY_FUN(name, func) \ - struct TorchTenaryOpDesc_ ## name ## _ ## func : public TorchFirstShape { \ - static constexpr const char* fname = #func; \ - static const int num_inputs = 3; \ - static const int num_outputs = 1; \ - }; \ - MXNET_REGISTER_TORCH_FUN(name, TorchTenaryOpDesc_ ## name ## _ ## func) - -#define MXNET_REGISTER_TORCH_CONSTRUCTOR_FUN(name, func) \ - struct TorchConstructorOpDesc_ ## name ## _ ## func : public TorchConstructorShape { \ - static constexpr const char* fname = #func; \ - }; \ - MXNET_REGISTER_TORCH_FUN(name, TorchConstructorOpDesc_ ## name ## _ ## func) - +#define MXNET_REGISTER_TORCH_FUN(name, OP) \ + MXNET_REGISTER_NDARRAY_FUN(name) \ + .set_function(TorchOp) \ + .set_num_use_vars(OP::num_inputs) \ + .set_num_mutate_vars(OP::num_outputs) \ + .set_type_mask(kAcceptEmptyMutateTarget) + +#define MXNET_REGISTER_TORCH_UNARY_FUN(name, func) \ + struct TorchUnaryOpDesc_##name##_##func : public TorchFirstShape { \ + static constexpr const char* fname = #func; \ + static const int num_inputs = 1; \ + static const int num_outputs = 1; \ + }; \ + MXNET_REGISTER_TORCH_FUN(name, TorchUnaryOpDesc_##name##_##func) \ + .add_argument("x", "NDArray", "Input NDArray") + +#define MXNET_REGISTER_TORCH_BINARY_FUN(name, func) \ + struct TorchBinaryOpDesc_##name##_##func : public TorchFirstShape { \ + static constexpr const char* fname = #func; \ + static const int num_inputs = 2; \ + static const int num_outputs = 1; \ + }; \ + MXNET_REGISTER_TORCH_FUN(name, TorchBinaryOpDesc_##name##_##func) + +#define MXNET_REGISTER_TORCH_BINARY_FUN_WITH_ARG(name, func) \ + MXNET_REGISTER_TORCH_BINARY_FUN(name, func) \ + .add_argument("x1", "NDArray", "First Input NDArray") \ + .add_argument("x2", "NDArray", "Second Input NDArray") + +#define MXNET_REGISTER_TORCH_TENARY_FUN(name, func) \ + struct TorchTenaryOpDesc_##name##_##func : public TorchFirstShape { \ + static constexpr const char* fname = #func; \ + static const int num_inputs = 3; \ + static const int num_outputs = 1; \ + }; \ + MXNET_REGISTER_TORCH_FUN(name, TorchTenaryOpDesc_##name##_##func) + +#define MXNET_REGISTER_TORCH_CONSTRUCTOR_FUN(name, func) \ + struct TorchConstructorOpDesc_##name##_##func : public TorchConstructorShape { \ + static constexpr const char* fname = #func; \ + }; \ + MXNET_REGISTER_TORCH_FUN(name, TorchConstructorOpDesc_##name##_##func) } // namespace mxnet #endif // PLUGIN_TORCH_TORCH_FUNCTION_H_ diff --git a/plugin/torch/torch_module-inl.h b/plugin/torch/torch_module-inl.h index 386f0e31fb43..ef13493ba56b 100644 --- a/plugin/torch/torch_module-inl.h +++ b/plugin/torch/torch_module-inl.h @@ -22,7 +22,7 @@ * \file torch_module-inl.h * \brief torch module operator * \author Min Lin -*/ + */ #ifndef PLUGIN_TORCH_TORCH_MODULE_INL_H_ #define PLUGIN_TORCH_TORCH_MODULE_INL_H_ @@ -47,13 +47,10 @@ struct TorchModuleParam : public dmlc::Parameter { uint32_t num_outputs; DMLC_DECLARE_PARAMETER(TorchModuleParam) { DMLC_DECLARE_FIELD(lua_string) - .describe("lua string that is called to generate the torch module object"); - DMLC_DECLARE_FIELD(num_data) - .describe("the number of input data"); - DMLC_DECLARE_FIELD(num_params) - .describe("the number of parameters"); - DMLC_DECLARE_FIELD(num_outputs) - .describe("the number of outputs"); + .describe("lua string that is called to generate the torch module object"); + DMLC_DECLARE_FIELD(num_data).describe("the number of input data"); + DMLC_DECLARE_FIELD(num_params).describe("the number of parameters"); + DMLC_DECLARE_FIELD(num_outputs).describe("the number of outputs"); } }; @@ -61,7 +58,7 @@ struct TorchModuleParam : public dmlc::Parameter { * \brief This is the implementation of activation operator. * \tparam xpu The device that the op will be executed on. */ -template +template class TorchModuleOp : public Operator { private: TorchModuleParam param_; @@ -73,8 +70,8 @@ class TorchModuleOp : public Operator { this->param_ = p; lua_State* L = torchState_->L; CHECK_EQ(lua_gettop(L), 0); - std::string exec = std::string("return ") + p.lua_string - + TorchTensor::ModuleType(xpu::kDevMask); + std::string exec = + std::string("return ") + p.lua_string + TorchTensor::ModuleType(xpu::kDevMask); CHECK_EQ(luaL_loadstring(L, exec.c_str()), 0); int err = lua_pcall(L, 0, 1, 0); CHECK_EQ(err, 0) << lua_tostring(L, -1); @@ -111,25 +108,24 @@ class TorchModuleOp : public Operator { this->lua_reference_ = luaL_ref(L, LUA_REGISTRYINDEX); } - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { + virtual void Forward(const OpContext& ctx, + const std::vector& in_data, + const std::vector& req, + const std::vector& out_data, + const std::vector& aux_args) { lua_State* L = torchState_->L; CHECK_EQ(lua_gettop(L), 0); CHECK_EQ(in_data.size(), param_.num_params + param_.num_data); CHECK_EQ(out_data.size(), param_.num_outputs); - mshadow::Stream *s = ctx.get_stream(); + mshadow::Stream* s = ctx.get_stream(); torchState_->SetStream(s); // Deserialize self table lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_); - std::vector th_output = - TorchTensor::TBlobVectorAsTable(torchState_, out_data.begin(), - out_data.begin() + param_.num_outputs); + std::vector th_output = TorchTensor::TBlobVectorAsTable( + torchState_, out_data.begin(), out_data.begin() + param_.num_outputs); // set the output field lua_setfield(L, -2, "output"); // set the parameters @@ -157,38 +153,40 @@ class TorchModuleOp : public Operator { // | self | updateOutput lua_pushvalue(L, -2); // | self | updateOutput | self - TorchTensor::TBlobVectorAsTable(torchState_, in_data.begin(), - in_data.begin() + param_.num_data); + TorchTensor::TBlobVectorAsTable( + torchState_, in_data.begin(), in_data.begin() + param_.num_data); // | self | updateOutput | self | inputs int err = lua_pcall(L, 2, 1, 0); // doesn't need the output CHECK_EQ(err, 0) << lua_tostring(L, -1); - TorchTensor::CheckOutput(torchState_, out_data.begin(), out_data.begin() + param_.num_outputs, - th_output.begin(), th_output.end()); + TorchTensor::CheckOutput(torchState_, + out_data.begin(), + out_data.begin() + param_.num_outputs, + th_output.begin(), + th_output.end()); lua_pop(L, 2); CHECK_EQ(lua_gettop(L), 0); } - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { + virtual void Backward(const OpContext& ctx, + const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data, + const std::vector& req, + const std::vector& in_grad, + const std::vector& aux_args) { lua_State* L = torchState_->L; CHECK_EQ(lua_gettop(L), 0); CHECK_EQ(in_data.size(), param_.num_params + param_.num_data); CHECK_EQ(out_data.size(), param_.num_outputs); CHECK_EQ(out_grad.size(), param_.num_outputs); CHECK_EQ(in_grad.size(), param_.num_params + param_.num_data); - mshadow::Stream *s = ctx.get_stream(); + mshadow::Stream* s = ctx.get_stream(); torchState_->SetStream(s); lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_); TorchTensor::TBlobVectorAsTable(torchState_, out_data.begin(), out_data.end()); lua_setfield(L, -2, "output"); - std::vector th_grad = - TorchTensor::TBlobVectorAsTable(torchState_, in_grad.begin(), - in_grad.begin() + param_.num_data); + std::vector th_grad = TorchTensor::TBlobVectorAsTable( + torchState_, in_grad.begin(), in_grad.begin() + param_.num_data); lua_setfield(L, -2, "gradInput"); if (param_.num_params != 0) { // get the parameters into the stack @@ -201,20 +199,21 @@ class TorchModuleOp : public Operator { std::vector::const_iterator it = in_data.begin() + param_.num_data; while (lua_next(L, -3)) { TorchTensor::SetInternal( - torchState_, - static_cast(luaT_toudata(L, -1, TorchTensor::TensorType(*it))), - *it); + torchState_, + static_cast(luaT_toudata(L, -1, TorchTensor::TensorType(*it))), + *it); it++; lua_pop(L, 1); } // iterate the grad of params lua_pushnil(L); - it = in_grad.begin() + param_.num_data;; + it = in_grad.begin() + param_.num_data; + ; while (lua_next(L, -2)) { TorchTensor::SetInternal( - torchState_, - static_cast(luaT_toudata(L, -1, TorchTensor::TensorType(*it))), - *it); + torchState_, + static_cast(luaT_toudata(L, -1, TorchTensor::TensorType(*it))), + *it); it++; lua_pop(L, 1); } @@ -223,8 +222,8 @@ class TorchModuleOp : public Operator { lua_getfield(L, -1, "zeroGradParameters"); lua_pushvalue(L, -2); CHECK_EQ(lua_pcall(L, 1, 0, 0), 0); - TorchTensor::TBlobVectorAsTable(torchState_, in_data.begin(), - in_data.begin() + param_.num_data); + TorchTensor::TBlobVectorAsTable( + torchState_, in_data.begin(), in_data.begin() + param_.num_data); TorchTensor::TBlobVectorAsTable(torchState_, out_grad.begin(), out_grad.end()); // call lua_getfield(L, -3, "accGradParameters"); @@ -240,15 +239,18 @@ class TorchModuleOp : public Operator { lua_pushvalue(L, -4); err = lua_pcall(L, 3, 1, 0); // doesn't need the output CHECK_EQ(err, 0) << lua_tostring(L, -1); - TorchTensor::CheckOutput(torchState_, in_grad.begin(), in_grad.begin() + param_.num_data, - th_grad.begin(), th_grad.end()); + TorchTensor::CheckOutput(torchState_, + in_grad.begin(), + in_grad.begin() + param_.num_data, + th_grad.begin(), + th_grad.end()); lua_pop(L, 4); CHECK_EQ(lua_gettop(L), 0); } }; // class TorchModuleOp // Declare Factory function, used for dispatch specialization -template +template Operator* CreateOp(TorchModuleParam type, TorchState* torchState); #if DMLC_USE_CXX11 @@ -260,8 +262,8 @@ class TorchModuleProp : public OperatorProperty { void InitTorchState() const { this->torchState_ = new TorchState(); - lua_State* L = torchState_->L; - std::string exec = std::string("return ") + param_.lua_string; + lua_State* L = torchState_->L; + std::string exec = std::string("return ") + param_.lua_string; CHECK_EQ(luaL_loadstring(L, exec.c_str()), 0); int err = lua_pcall(L, 0, LUA_MULTRET, 0); CHECK_EQ(lua_gettop(L), 1); @@ -277,8 +279,7 @@ class TorchModuleProp : public OperatorProperty { } public: - TorchModuleProp() : OperatorProperty(), torchState_(NULL), lua_reference_(-1) { - } + TorchModuleProp() : OperatorProperty(), torchState_(NULL), lua_reference_(-1) {} std::vector ListArguments() const override { if (!torchState_) { @@ -347,9 +348,9 @@ class TorchModuleProp : public OperatorProperty { return param_.__DICT__(); } - bool InferShape(mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - mxnet::ShapeVector *aux_shape) const override { + bool InferShape(mxnet::ShapeVector* in_shape, + mxnet::ShapeVector* out_shape, + mxnet::ShapeVector* aux_shape) const override { if (torchState_ == nullptr) { this->InitTorchState(); } @@ -394,9 +395,9 @@ class TorchModuleProp : public OperatorProperty { lua_pushnil(L); int index = param_.num_data; while (lua_next(L, -3)) { - THFloatTensor* param = reinterpret_cast(luaT_toudata(L, -1, - TorchTensor::TensorType(mshadow::cpu::kDevMask))); - long int* size = param->size; // NOLINT(*) + THFloatTensor* param = reinterpret_cast( + luaT_toudata(L, -1, TorchTensor::TensorType(mshadow::cpu::kDevMask))); + long int* size = param->size; // NOLINT(*) (*in_shape)[index++] = mxnet::TShape(size, size + THFloatTensor_nDimension(param)); lua_pop(L, 1); } @@ -405,18 +406,18 @@ class TorchModuleProp : public OperatorProperty { lua_getfield(L, -1, "output"); if (param_.num_outputs == 0) { } else if (param_.num_outputs == 1) { - THFloatTensor* output = reinterpret_cast(luaT_toudata(L, -1, - TorchTensor::TensorType(mshadow::cpu::kDevMask))); - long int* size = output->size; // NOLINT(*) + THFloatTensor* output = reinterpret_cast( + luaT_toudata(L, -1, TorchTensor::TensorType(mshadow::cpu::kDevMask))); + long int* size = output->size; // NOLINT(*) (*out_shape)[0] = mxnet::TShape(size, size + THFloatTensor_nDimension(output)); } else { for (uint32_t data_index = 0; data_index < param_.num_outputs; ++data_index) { lua_pushnil(L); int index = 0; while (lua_next(L, -2)) { - THFloatTensor* out = reinterpret_cast(luaT_toudata(L, -1, - TorchTensor::TensorType(mshadow::cpu::kDevMask))); - long int* size = out->size; // NOLINT(*) + THFloatTensor* out = reinterpret_cast( + luaT_toudata(L, -1, TorchTensor::TensorType(mshadow::cpu::kDevMask))); + long int* size = out->size; // NOLINT(*) (*out_shape)[index++] = mxnet::TShape(size, size + THFloatTensor_nDimension(out)); } } @@ -427,7 +428,7 @@ class TorchModuleProp : public OperatorProperty { } OperatorProperty* Copy() const override { - auto ptr = new TorchModuleProp(); + auto ptr = new TorchModuleProp(); ptr->param_ = param_; return ptr; } @@ -437,10 +438,9 @@ class TorchModuleProp : public OperatorProperty { } // decalre dependency and inplace optimization options - std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { + std::vector DeclareBackwardDependency(const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data) const override { std::vector dep; dep.insert(dep.end(), out_grad.begin(), out_grad.end()); dep.insert(dep.end(), out_data.begin(), out_data.end()); diff --git a/plugin/torch/torch_module.cc b/plugin/torch/torch_module.cc index 4ab792c4dd58..ad6fa8a49d50 100644 --- a/plugin/torch/torch_module.cc +++ b/plugin/torch/torch_module.cc @@ -22,27 +22,27 @@ * \file activation.cc * \brief activation op * \author Bing Xu -*/ + */ #include "./torch_module-inl.h" #include "../../src/operator/mshadow_op.h" namespace mxnet { namespace op { -template<> -Operator *CreateOp(TorchModuleParam param, TorchState* torchState) { +template <> +Operator* CreateOp(TorchModuleParam param, TorchState* torchState) { return new TorchModuleOp(param, torchState); } // DO_BIND_DISPATCH comes from operator_common.h -Operator *TorchModuleProp::CreateOperator(Context ctx) const { +Operator* TorchModuleProp::CreateOperator(Context ctx) const { DO_BIND_DISPATCH(CreateOp, param_, torchState_); } DMLC_REGISTER_PARAMETER(TorchModuleParam); MXNET_REGISTER_OP_PROPERTY(TorchModule, TorchModuleProp) -.describe("Modules from torch.") -.add_arguments(TorchModuleParam::__FIELDS__()); + .describe("Modules from torch.") + .add_arguments(TorchModuleParam::__FIELDS__()); } // namespace op } // namespace mxnet diff --git a/plugin/torch/torch_module.cu b/plugin/torch/torch_module.cu index d743da5fd922..b6ac7f8cbd19 100644 --- a/plugin/torch/torch_module.cu +++ b/plugin/torch/torch_module.cu @@ -22,14 +22,14 @@ * \file activation.cc * \brief activation op * \author Bing Xu -*/ + */ #include "./torch_module-inl.h" #include "../../src/operator/mshadow_op.h" namespace mxnet { namespace op { -template<> -Operator *CreateOp(TorchModuleParam param, TorchState* torchState) { +template <> +Operator* CreateOp(TorchModuleParam param, TorchState* torchState) { return new TorchModuleOp(param, torchState); } diff --git a/plugin/warpctc/warpctc-inl.h b/plugin/warpctc/warpctc-inl.h index 9fcbedce74f1..dcc581765f62 100644 --- a/plugin/warpctc/warpctc-inl.h +++ b/plugin/warpctc/warpctc-inl.h @@ -22,7 +22,7 @@ * \file warpctc-inl.h * \brief warpctc operator * \author Liang Xiang -*/ + */ #ifndef PLUGIN_WARPCTC_WARPCTC_INL_H_ #define PLUGIN_WARPCTC_WARPCTC_INL_H_ @@ -43,25 +43,21 @@ namespace mxnet { namespace op { namespace warpctc_enum { - enum CTCOpInputs {kData, kLabel}; - enum CTCOpOutputs {kOut}; - enum CTCTemp {kTmp}; +enum CTCOpInputs { kData, kLabel }; +enum CTCOpOutputs { kOut }; +enum CTCTemp { kTmp }; } // namespace warpctc_enum struct WarpCTCParam : public dmlc::Parameter { int label_length; int input_length; DMLC_DECLARE_PARAMETER(WarpCTCParam) { - DMLC_DECLARE_FIELD(label_length) - .set_default(0) - .describe("Real label length"); - DMLC_DECLARE_FIELD(input_length) - .set_default(0) - .describe("Input length"); + DMLC_DECLARE_FIELD(label_length).set_default(0).describe("Real label length"); + DMLC_DECLARE_FIELD(input_length).set_default(0).describe("Input length"); } }; -template +template class WarpCTCOp : public Operator { private: WarpCTCParam param_; @@ -71,37 +67,37 @@ class WarpCTCOp : public Operator { this->param_ = p; } - ~WarpCTCOp() { - } + ~WarpCTCOp() {} inline void throw_on_error(ctcStatus_t status, const char* message) { if (status != CTC_STATUS_SUCCESS) { - throw std::runtime_error(message - + (", stat = " - + std::string(ctcGetStatusString(status)))); + throw std::runtime_error(message + (", stat = " + std::string(ctcGetStatusString(status)))); } } - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { + virtual void Forward(const OpContext& ctx, + const std::vector& in_data, + const std::vector& req, + const std::vector& out_data, + const std::vector& aux_args) { using namespace mshadow; using namespace mshadow::expr; CHECK_EQ(in_data.size(), 2) << "CTCOutput Input: [data, label]"; CHECK_EQ(out_data.size(), 1) << "CTCOutput Output: [output]"; - Stream *s = ctx.get_stream(); - TBlob data = in_data[warpctc_enum::kData]; - TBlob out = out_data[warpctc_enum::kOut]; + Stream* s = ctx.get_stream(); + TBlob data = in_data[warpctc_enum::kData]; + TBlob out = out_data[warpctc_enum::kOut]; Tensor data_tensor = data.FlatTo2D(s); - Tensor out_tensor = out.FlatTo2D(s); + Tensor out_tensor = out.FlatTo2D(s); Softmax(out_tensor, data_tensor); } - std::vector labelLengths(const int * flat_labels, int minibatch, - int size, int blank, int * total_length) { + std::vector labelLengths(const int* flat_labels, + int minibatch, + int size, + int blank, + int* total_length) { CHECK_EQ(param_.label_length * minibatch, size) << "label size should = label_length * minibatch"; std::vector ret(minibatch, 0); @@ -116,8 +112,7 @@ class WarpCTCOp : public Operator { return ret; } - void removeBlank(const int * flat_labels, int * cpu_labels, - int size, int blank) { + void removeBlank(const int* flat_labels, int* cpu_labels, int size, int blank) { int k = 0; for (int i = 0; i < size; i++) { if (flat_labels[i] != blank) { @@ -127,25 +122,25 @@ class WarpCTCOp : public Operator { } } - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { + virtual void Backward(const OpContext& ctx, + const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data, + const std::vector& req, + const std::vector& in_grad, + const std::vector& aux_args) { using namespace mshadow; - Stream *s = ctx.get_stream(); - TBlob data = in_data[warpctc_enum::kData]; - TBlob label = in_data[warpctc_enum::kLabel]; + Stream* s = ctx.get_stream(); + TBlob data = in_data[warpctc_enum::kData]; + TBlob label = in_data[warpctc_enum::kLabel]; CHECK_EQ(data.shape_.ndim(), 2) << "input data shape should be 2 (t*n, p)"; - ctcOptions info; //please updated to latest baidu/warp-ctc NOLINT(*) + ctcOptions info; // please updated to latest baidu/warp-ctc NOLINT(*) if (data.dev_mask() == cpu::kDevMask) { - info.loc = CTC_CPU; + info.loc = CTC_CPU; info.num_threads = 1; } else if (data.dev_mask() == gpu::kDevMask) { #if MXNET_USE_CUDA - info.loc = CTC_GPU; + info.loc = CTC_GPU; info.stream = ctx.get_stream()->stream_; } else { #endif @@ -153,8 +148,8 @@ class WarpCTCOp : public Operator { } info.blank_label = 0; - int T = param_.input_length; - int minibatch = data.shape_[0] / T; + int T = param_.input_length; + int minibatch = data.shape_[0] / T; int alphabet_size = data.shape_[1]; std::vector input_lengths; for (int i = 0; i < minibatch; i++) { @@ -164,15 +159,16 @@ class WarpCTCOp : public Operator { #if MXNET_USE_CUDA cudaError_t cuda_status; #endif - float* activations = static_cast(data.dptr_); - int* flat_labels = static_cast(label.dptr_); + float* activations = static_cast(data.dptr_); + int* flat_labels = static_cast(label.dptr_); int* cpu_raw_labels = flat_labels; - float* grads = static_cast(in_grad[warpctc_enum::kData].dptr_); + float* grads = static_cast(in_grad[warpctc_enum::kData].dptr_); if (data.dev_mask() == gpu::kDevMask) { #if MXNET_USE_CUDA cpu_raw_labels = reinterpret_cast(malloc(sizeof(int) * label.Size())); - cuda_status = cudaMemcpyAsync(cpu_raw_labels, flat_labels, - label.Size()*sizeof(int), + cuda_status = cudaMemcpyAsync(cpu_raw_labels, + flat_labels, + label.Size() * sizeof(int), cudaMemcpyDeviceToHost, ctx.get_stream()->stream_); CHECK_EQ(cuda_status, cudaSuccess) << "cuda memcpy label error"; @@ -180,24 +176,22 @@ class WarpCTCOp : public Operator { } int total_label_length = 0; - std::vector label_lengths = labelLengths(cpu_raw_labels, - minibatch, - label.Size(), - 0, &total_label_length); - int* cpu_labels = reinterpret_cast( - malloc(sizeof(int) * total_label_length)); + std::vector label_lengths = + labelLengths(cpu_raw_labels, minibatch, label.Size(), 0, &total_label_length); + int* cpu_labels = reinterpret_cast(malloc(sizeof(int) * total_label_length)); removeBlank(cpu_raw_labels, cpu_labels, label.Size(), 0); size_t alloc_bytes; throw_on_error(get_workspace_size(label_lengths.data(), input_lengths.data(), alphabet_size, - input_lengths.size(), info, + input_lengths.size(), + info, &alloc_bytes), "Error: get_workspace_size in inf_test"); - Tensor ctc_workspace = ctx.requested[warpctc_enum::kTmp].get_space( - mshadow::Shape1(alloc_bytes), s); + Tensor ctc_workspace = + ctx.requested[warpctc_enum::kTmp].get_space(mshadow::Shape1(alloc_bytes), s); std::vector costs(minibatch); throw_on_error(compute_ctc_loss(activations, @@ -223,10 +217,9 @@ class WarpCTCOp : public Operator { } }; -template +template Operator* CreateOp(WarpCTCParam type); - #if DMLC_USE_CXX11 class WarpCTCProp : public OperatorProperty { public: @@ -238,8 +231,7 @@ class WarpCTCProp : public OperatorProperty { return {"output"}; } - void Init(const std::vector >& kwargs) - override { + void Init(const std::vector >& kwargs) override { param_.Init(kwargs); } @@ -247,13 +239,14 @@ class WarpCTCProp : public OperatorProperty { return param_.__DICT__(); } - bool InferShape(mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - mxnet::ShapeVector *aux_shape) const override { + bool InferShape(mxnet::ShapeVector* in_shape, + mxnet::ShapeVector* out_shape, + mxnet::ShapeVector* aux_shape) const override { using namespace mshadow; CHECK_EQ(in_shape->size(), 2) << "Input:[data, label]"; - const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + const mxnet::TShape& dshape = in_shape->at(0); + if (dshape.ndim() == 0) + return false; mxnet::TShape label_shape(dshape.ndim() - 1, 1); label_shape[0] = param_.label_length * (dshape[0] / param_.input_length); SHAPE_ASSIGN_CHECK(*in_shape, warpctc_enum::kLabel, label_shape); @@ -263,9 +256,9 @@ class WarpCTCProp : public OperatorProperty { return true; } - virtual bool InferType(std::vector *in_type, - std::vector *out_type, - std::vector *aux_type) const { + virtual bool InferType(std::vector* in_type, + std::vector* out_type, + std::vector* aux_type) const { CHECK_LE(in_type->size(), this->ListArguments().size()); in_type->clear(); in_type->push_back(mshadow::kFloat32); @@ -275,13 +268,12 @@ class WarpCTCProp : public OperatorProperty { return true; } - std::vector BackwardResource( - const mxnet::ShapeVector &in_shape) const override { + std::vector BackwardResource(const mxnet::ShapeVector& in_shape) const override { return {ResourceRequest::kTempSpace}; } OperatorProperty* Copy() const override { - auto ptr = new WarpCTCProp(); + auto ptr = new WarpCTCProp(); ptr->param_ = param_; return ptr; } @@ -290,14 +282,11 @@ class WarpCTCProp : public OperatorProperty { return "WarpCTC"; } - - std::vector DeclareBackwardDependency(const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) - const override { - return {in_data[warpctc_enum::kData], - in_data[warpctc_enum::kLabel], - out_data[warpctc_enum::kOut]}; + std::vector DeclareBackwardDependency(const std::vector& out_grad, + const std::vector& in_data, + const std::vector& out_data) const override { + return { + in_data[warpctc_enum::kData], in_data[warpctc_enum::kLabel], out_data[warpctc_enum::kOut]}; } Operator* CreateOperator(Context ctx) const override; diff --git a/plugin/warpctc/warpctc.cc b/plugin/warpctc/warpctc.cc index aac36a375a9e..754883ab226b 100644 --- a/plugin/warpctc/warpctc.cc +++ b/plugin/warpctc/warpctc.cc @@ -22,29 +22,29 @@ * \file warpctc.cc * \brief warpctc op * \author Liang Xiang -*/ + */ #include "./warpctc-inl.h" #include "../../src/operator/mshadow_op.h" namespace mxnet { namespace op { -template<> -Operator *CreateOp(WarpCTCParam param) { +template <> +Operator* CreateOp(WarpCTCParam param) { return new WarpCTCOp(param); } -Operator *WarpCTCProp::CreateOperator(Context ctx) const { +Operator* WarpCTCProp::CreateOperator(Context ctx) const { DO_BIND_DISPATCH(CreateOp, param_); } DMLC_REGISTER_PARAMETER(WarpCTCParam); MXNET_REGISTER_OP_PROPERTY(WarpCTC, WarpCTCProp) -.add_argument("data", "NDArray-or-Symbol", "Input data.") -.add_argument("label", "NDArray-or-Symbol", "Input label.") -.describe("warp ctc.") -.add_arguments(WarpCTCParam::__FIELDS__()); + .add_argument("data", "NDArray-or-Symbol", "Input data.") + .add_argument("label", "NDArray-or-Symbol", "Input label.") + .describe("warp ctc.") + .add_arguments(WarpCTCParam::__FIELDS__()); } // namespace op } // namespace mxnet diff --git a/plugin/warpctc/warpctc.cu b/plugin/warpctc/warpctc.cu index 3ee20fc9d3fe..093a1498823a 100644 --- a/plugin/warpctc/warpctc.cu +++ b/plugin/warpctc/warpctc.cu @@ -22,15 +22,15 @@ * \file warpctc.cc * \brief warpctc op * \author Liang Xiang -*/ + */ #include "./warpctc-inl.h" #include #include "../../src/operator/mshadow_op.h" namespace mxnet { namespace op { -template<> -Operator *CreateOp(WarpCTCParam param) { +template <> +Operator* CreateOp(WarpCTCParam param) { return new WarpCTCOp(param); } From 2ae66a0954a506db90d99f5fe26ffdb8197ce1fd Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 4 Nov 2021 09:02:09 +0100 Subject: [PATCH 07/10] [TOOLS] Re-format .cc .h files --- tools/im2rec.cc | 275 +++++++++++++++++++++++++++--------------------- 1 file changed, 156 insertions(+), 119 deletions(-) diff --git a/tools/im2rec.cc b/tools/im2rec.cc index 1c4071a23eee..db8df4481582 100644 --- a/tools/im2rec.cc +++ b/tools/im2rec.cc @@ -42,51 +42,68 @@ #include "../src/io/image_recordio.h" #include /*! - *\brief get interpolation method with given inter_method, 0-CV_INTER_NN 1-CV_INTER_LINEAR 2-CV_INTER_CUBIC - *\ 3-CV_INTER_AREA 4-CV_INTER_LANCZOS4 9-AUTO(cubic for enlarge, area for shrink, bilinear for others) 10-RAND(0-4) + *\brief get interpolation method with given inter_method, 0-CV_INTER_NN 1-CV_INTER_LINEAR + *2-CV_INTER_CUBIC \ 3-CV_INTER_AREA 4-CV_INTER_LANCZOS4 9-AUTO(cubic for enlarge, area for shrink, + *bilinear for others) 10-RAND(0-4) */ -int GetInterMethod(int inter_method, int old_width, int old_height, int new_width, int new_height, std::mt19937& prnd) { - if (inter_method == 9) { - if (new_width > old_width && new_height > old_height) { - return 2; // CV_INTER_CUBIC for enlarge - } else if (new_width rand_uniform_int(0, 4); - return rand_uniform_int(prnd); +int GetInterMethod(int inter_method, + int old_width, + int old_height, + int new_width, + int new_height, + std::mt19937& prnd) { + if (inter_method == 9) { + if (new_width > old_width && new_height > old_height) { + return 2; // CV_INTER_CUBIC for enlarge + } else if (new_width < old_width && new_height < old_height) { + return 3; // CV_INTER_AREA for shrink } else { - return inter_method; + return 1; // CV_INTER_LINEAR for others } + } else if (inter_method == 10) { + std::uniform_int_distribution rand_uniform_int(0, 4); + return rand_uniform_int(prnd); + } else { + return inter_method; + } } -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { if (argc < 4) { - printf("Usage: [additional parameters in form key=value]\n"\ - "Possible additional parameters:\n"\ - "\tcolor=USE_COLOR[default=1] Force color (1), gray image (0) or keep source unchanged (-1).\n"\ - "\tresize=newsize resize the shorter edge of image to the newsize, original images will be packed by default\n"\ - "\tlabel_width=WIDTH[default=1] specify the label_width in the list, by default set to 1\n"\ - "\tpack_label=PACK_LABEL[default=0] whether to also pack multi dimenional label in the record file\n"\ - "\tnsplit=NSPLIT[default=1] used for part generation, logically split the image.list to NSPLIT parts by position\n"\ - "\tpart=PART[default=0] used for part generation, pack the images from the specific part in image.list\n"\ - "\tcenter_crop=CENTER_CROP[default=0] specify whether to crop the center image to make it square.\n"\ - "\tquality=QUALITY[default=95] JPEG quality for encoding (1-100, default: 95) or PNG compression for encoding (1-9, default: 3).\n"\ - "\tencoding=ENCODING[default='.jpg'] Encoding type. Can be '.jpg' or '.png'\n"\ - "\tinter_method=INTER_METHOD[default=1] NN(0) BILINEAR(1) CUBIC(2) AREA(3) LANCZOS4(4) AUTO(9) RAND(10).\n"\ - "\tunchanged=UNCHANGED[default=0] Keep the original image encoding, size and color. If set to 1, it will ignore the others parameters.\n"); + printf( + "Usage: [additional parameters in form " + "key=value]\n" + "Possible additional parameters:\n" + "\tcolor=USE_COLOR[default=1] Force color (1), gray image (0) or keep source unchanged " + "(-1).\n" + "\tresize=newsize resize the shorter edge of image to the newsize, original images will be " + "packed by default\n" + "\tlabel_width=WIDTH[default=1] specify the label_width in the list, by default set to 1\n" + "\tpack_label=PACK_LABEL[default=0] whether to also pack multi dimenional label in the " + "record file\n" + "\tnsplit=NSPLIT[default=1] used for part generation, logically split the image.list to " + "NSPLIT parts by position\n" + "\tpart=PART[default=0] used for part generation, pack the images from the specific part " + "in image.list\n" + "\tcenter_crop=CENTER_CROP[default=0] specify whether to crop the center image to make it " + "square.\n" + "\tquality=QUALITY[default=95] JPEG quality for encoding (1-100, default: 95) or PNG " + "compression for encoding (1-9, default: 3).\n" + "\tencoding=ENCODING[default='.jpg'] Encoding type. Can be '.jpg' or '.png'\n" + "\tinter_method=INTER_METHOD[default=1] NN(0) BILINEAR(1) CUBIC(2) AREA(3) LANCZOS4(4) " + "AUTO(9) RAND(10).\n" + "\tunchanged=UNCHANGED[default=0] Keep the original image encoding, size and color. If set " + "to 1, it will ignore the others parameters.\n"); return 0; } - int label_width = 1; - int pack_label = 0; - int new_size = -1; - int nsplit = 1; - int partid = 0; - int center_crop = 0; - int quality = 95; - int color_mode = CV_LOAD_IMAGE_COLOR; - int unchanged = 0; + int label_width = 1; + int pack_label = 0; + int new_size = -1; + int nsplit = 1; + int partid = 0; + int center_crop = 0; + int quality = 95; + int color_mode = CV_LOAD_IMAGE_COLOR; + int unchanged = 0; int inter_method = CV_INTER_LINEAR; std::string encoding(".jpg"); for (int i = 4; i < argc; ++i) { @@ -100,17 +117,28 @@ int main(int argc, char *argv[]) { #endif if (effct_len == 2) { - if (!strcmp(key, "resize")) new_size = atoi(val); - if (!strcmp(key, "label_width")) label_width = atoi(val); - if (!strcmp(key, "pack_label")) pack_label = atoi(val); - if (!strcmp(key, "nsplit")) nsplit = atoi(val); - if (!strcmp(key, "part")) partid = atoi(val); - if (!strcmp(key, "center_crop")) center_crop = atoi(val); - if (!strcmp(key, "quality")) quality = atoi(val); - if (!strcmp(key, "color")) color_mode = atoi(val); - if (!strcmp(key, "encoding")) encoding = std::string(val); - if (!strcmp(key, "unchanged")) unchanged = atoi(val); - if (!strcmp(key, "inter_method")) inter_method = atoi(val); + if (!strcmp(key, "resize")) + new_size = atoi(val); + if (!strcmp(key, "label_width")) + label_width = atoi(val); + if (!strcmp(key, "pack_label")) + pack_label = atoi(val); + if (!strcmp(key, "nsplit")) + nsplit = atoi(val); + if (!strcmp(key, "part")) + partid = atoi(val); + if (!strcmp(key, "center_crop")) + center_crop = atoi(val); + if (!strcmp(key, "quality")) + quality = atoi(val); + if (!strcmp(key, "color")) + color_mode = atoi(val); + if (!strcmp(key, "encoding")) + encoding = std::string(val); + if (!strcmp(key, "unchanged")) + unchanged = atoi(val); + if (!strcmp(key, "inter_method")) + inter_method = atoi(val); } } // Check parameters ranges @@ -140,43 +168,42 @@ int main(int argc, char *argv[]) { LOG(INFO) << "Encoding is " << encoding; if (encoding == std::string(".png") && quality > 9) { - quality = 3; + quality = 3; } if (inter_method != 1) { - switch (inter_method) { - case 0: - LOG(INFO) << "Use inter_method CV_INTER_NN"; - break; - case 2: - LOG(INFO) << "Use inter_method CV_INTER_CUBIC"; - break; - case 3: - LOG(INFO) << "Use inter_method CV_INTER_AREA"; - break; - case 4: - LOG(INFO) << "Use inter_method CV_INTER_LANCZOS4"; - break; - case 9: - LOG(INFO) << "Use inter_method mod auto(cubic for enlarge, area for shrink)"; - break; - case 10: - LOG(INFO) << "Use inter_method mod rand(nn/bilinear/cubic/area/lanczos4)"; - break; - default: - LOG(INFO) << "Unkown inter_method"; - return 0; - } + switch (inter_method) { + case 0: + LOG(INFO) << "Use inter_method CV_INTER_NN"; + break; + case 2: + LOG(INFO) << "Use inter_method CV_INTER_CUBIC"; + break; + case 3: + LOG(INFO) << "Use inter_method CV_INTER_AREA"; + break; + case 4: + LOG(INFO) << "Use inter_method CV_INTER_LANCZOS4"; + break; + case 9: + LOG(INFO) << "Use inter_method mod auto(cubic for enlarge, area for shrink)"; + break; + case 10: + LOG(INFO) << "Use inter_method mod rand(nn/bilinear/cubic/area/lanczos4)"; + break; + default: + LOG(INFO) << "Unkown inter_method"; + return 0; + } } std::random_device rd; std::mt19937 prnd(rd()); using namespace dmlc; const static size_t kBufferSize = 1 << 20UL; - std::string root = argv[2]; + std::string root = argv[2]; mxnet::io::ImageRecordIO rec; - size_t imcnt = 0; - double tstart = dmlc::GetTime(); - dmlc::InputSplit *flist = dmlc::InputSplit:: - Create(argv[1], partid, nsplit, "text"); + size_t imcnt = 0; + double tstart = dmlc::GetTime(); + dmlc::InputSplit* flist = dmlc::InputSplit::Create(argv[1], partid, nsplit, "text"); std::ostringstream os; if (nsplit == 1) { os << argv[3]; @@ -184,7 +211,7 @@ int main(int argc, char *argv[]) { os << argv[3] << ".part" << std::setw(3) << std::setfill('0') << partid; } LOG(INFO) << "Write to output: " << os.str(); - dmlc::Stream *fo = dmlc::Stream::Create(os.str().c_str(), "w"); + dmlc::Stream* fo = dmlc::Stream::Create(os.str().c_str(), "w"); LOG(INFO) << "Output: " << os.str(); dmlc::RecordIOWriter writer(fo); std::string fname, path, blob; @@ -192,13 +219,13 @@ int main(int argc, char *argv[]) { std::vector encode_buf; std::vector encode_params; if (encoding == std::string(".png")) { - encode_params.push_back(CV_IMWRITE_PNG_COMPRESSION); - encode_params.push_back(quality); - LOG(INFO) << "PNG encoding compression: " << quality; + encode_params.push_back(CV_IMWRITE_PNG_COMPRESSION); + encode_params.push_back(quality); + LOG(INFO) << "PNG encoding compression: " << quality; } else { - encode_params.push_back(CV_IMWRITE_JPEG_QUALITY); - encode_params.push_back(quality); - LOG(INFO) << "JPEG encoding quality: " << quality; + encode_params.push_back(CV_IMWRITE_JPEG_QUALITY); + encode_params.push_back(quality); + LOG(INFO) << "JPEG encoding quality: " << quality; } dmlc::InputSplit::Blob line; std::vector label_buf(label_width, 0.f); @@ -206,32 +233,32 @@ int main(int argc, char *argv[]) { while (flist->NextRecord(&line)) { std::string sline(static_cast(line.dptr), line.size); std::istringstream is(sline); - if (!(is >> rec.header.image_id[0] >> rec.header.label)) continue; + if (!(is >> rec.header.image_id[0] >> rec.header.label)) + continue; label_buf[0] = rec.header.label; for (int k = 1; k < label_width; ++k) { - CHECK(is >> label_buf[k]) - << "Invalid ImageList, did you provide the correct label_width?"; + CHECK(is >> label_buf[k]) << "Invalid ImageList, did you provide the correct label_width?"; } - if (pack_label) rec.header.flag = label_width; + if (pack_label) + rec.header.flag = label_width; rec.SaveHeader(&blob); if (pack_label) { size_t bsize = blob.size(); - blob.resize(bsize + label_buf.size()*sizeof(float)); - memcpy(BeginPtr(blob) + bsize, - BeginPtr(label_buf), label_buf.size()*sizeof(float)); + blob.resize(bsize + label_buf.size() * sizeof(float)); + memcpy(BeginPtr(blob) + bsize, BeginPtr(label_buf), label_buf.size() * sizeof(float)); } CHECK(std::getline(is, fname)); // eliminate invalid chars in the end - while (fname.length() != 0 && - (isspace(*fname.rbegin()) || !isprint(*fname.rbegin()))) { + while (fname.length() != 0 && (isspace(*fname.rbegin()) || !isprint(*fname.rbegin()))) { fname.resize(fname.length() - 1); } // eliminate invalid chars in beginning. - const char *p = fname.c_str(); - while (isspace(*p)) ++p; + const char* p = fname.c_str(); + while (isspace(*p)) + ++p; path = root + p; // use "r" is equal to rb in dmlc::Stream - dmlc::Stream *fi = dmlc::Stream::Create(path.c_str(), "r"); + dmlc::Stream* fi = dmlc::Stream::Create(path.c_str(), "r"); decode_buf.clear(); size_t imsize = 0; while (true) { @@ -239,11 +266,11 @@ int main(int argc, char *argv[]) { size_t nread = fi->Read(BeginPtr(decode_buf) + imsize, kBufferSize); imsize += nread; decode_buf.resize(imsize); - if (nread != kBufferSize) break; + if (nread != kBufferSize) + break; } delete fi; - if (unchanged != 1) { cv::Mat img = cv::imdecode(decode_buf, color_mode); CHECK(img.data != nullptr) << "OpenCV decode fail:" << path; @@ -251,28 +278,40 @@ int main(int argc, char *argv[]) { if (new_size > 0) { if (center_crop) { if (img.rows > img.cols) { - int margin = (img.rows - img.cols)/2; - img = img(cv::Range(margin, margin+img.cols), cv::Range(0, img.cols)); + int margin = (img.rows - img.cols) / 2; + img = img(cv::Range(margin, margin + img.cols), cv::Range(0, img.cols)); } else { - int margin = (img.cols - img.rows)/2; - img = img(cv::Range(0, img.rows), cv::Range(margin, margin + img.rows)); + int margin = (img.cols - img.rows) / 2; + img = img(cv::Range(0, img.rows), cv::Range(margin, margin + img.rows)); } } int interpolation_method = 1; if (img.rows > img.cols) { - if (img.cols != new_size) { - interpolation_method = GetInterMethod(inter_method, img.cols, img.rows, new_size, img.rows * new_size / img.cols, prnd); - cv::resize(img, res, cv::Size(new_size, img.rows * new_size / img.cols), 0, 0, interpolation_method); - } else { - res = img.clone(); - } + if (img.cols != new_size) { + interpolation_method = GetInterMethod( + inter_method, img.cols, img.rows, new_size, img.rows * new_size / img.cols, prnd); + cv::resize(img, + res, + cv::Size(new_size, img.rows * new_size / img.cols), + 0, + 0, + interpolation_method); + } else { + res = img.clone(); + } } else { - if (img.rows != new_size) { - interpolation_method = GetInterMethod(inter_method, img.cols, img.rows, new_size * img.cols / img.rows, new_size, prnd); - cv::resize(img, res, cv::Size(new_size * img.cols / img.rows, new_size), 0, 0, interpolation_method); - } else { - res = img.clone(); - } + if (img.rows != new_size) { + interpolation_method = GetInterMethod( + inter_method, img.cols, img.rows, new_size * img.cols / img.rows, new_size, prnd); + cv::resize(img, + res, + cv::Size(new_size * img.cols / img.rows, new_size), + 0, + 0, + interpolation_method); + } else { + res = img.clone(); + } } } encode_buf.clear(); @@ -281,13 +320,11 @@ int main(int argc, char *argv[]) { // write buffer size_t bsize = blob.size(); blob.resize(bsize + encode_buf.size()); - memcpy(BeginPtr(blob) + bsize, - BeginPtr(encode_buf), encode_buf.size()); + memcpy(BeginPtr(blob) + bsize, BeginPtr(encode_buf), encode_buf.size()); } else { size_t bsize = blob.size(); blob.resize(bsize + decode_buf.size()); - memcpy(BeginPtr(blob) + bsize, - BeginPtr(decode_buf), decode_buf.size()); + memcpy(BeginPtr(blob) + bsize, BeginPtr(decode_buf), decode_buf.size()); } writer.WriteRecord(BeginPtr(blob), blob.size()); // write header From a47156d66767088adf522234bef95572e5f98de9 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Fri, 5 Nov 2021 07:22:27 +0100 Subject: [PATCH 08/10] Clang-format fix --- src/engine/threaded_engine_pooled.cc | 2 +- src/kvstore/kvstore_dist.h | 8 +++--- src/operator/contrib/bilinear_resize-inl.h | 6 ++-- src/operator/contrib/bounding_box.cu | 4 +-- src/operator/contrib/multi_lamb.cc | 8 +++--- src/operator/contrib/multi_lans.cc | 8 +++--- src/operator/nn/batch_norm.cu | 8 +++--- src/operator/nn/dnnl/dnnl_base.cc | 12 ++++---- src/operator/nn/dnnl/dnnl_rnn.cc | 28 +++++++++---------- src/operator/nn/softmax-inl.h | 4 +-- src/operator/optimizer_op.cc | 2 +- src/operator/optimizer_op.cu | 2 +- src/operator/subgraph/dnnl/dnnl_conv.cc | 2 +- .../subgraph/tensorrt/onnx_to_tensorrt.h | 2 ++ 14 files changed, 49 insertions(+), 47 deletions(-) diff --git a/src/engine/threaded_engine_pooled.cc b/src/engine/threaded_engine_pooled.cc index fd29f6daacc3..21dc470b708a 100644 --- a/src/engine/threaded_engine_pooled.cc +++ b/src/engine/threaded_engine_pooled.cc @@ -155,7 +155,7 @@ class ThreadedEnginePooled : public ThreadedEngine { bool is_copy = (opr_block->opr->prop == FnProperty::kCopyFromGPU || opr_block->opr->prop == FnProperty::kCopyToGPU); auto&& rctx = is_copy ? streams_->GetIORunContext(opr_block->ctx) : - streams_->GetRunContext(opr_block->ctx); + streams_->GetRunContext(opr_block->ctx); #if MXNET_USE_CUDA CallbackOnStart on_start; CallbackOnComplete callback; diff --git a/src/kvstore/kvstore_dist.h b/src/kvstore/kvstore_dist.h index a80176494e1b..27ddb82547a2 100644 --- a/src/kvstore/kvstore_dist.h +++ b/src/kvstore/kvstore_dist.h @@ -508,16 +508,16 @@ class KVStoreDist : public KVStoreLocal { const int dtype = recv_buf.dtype(); const int num_bytes = mshadow::mshadow_sizeof(dtype); PSKV& pskv = (gradient_compression_->get_type() == CompressionType::kNone) ? - EncodeDefaultKey(key, size, num_bytes) : - EncodeCompressedKey(key, size, false, num_bytes); - char* data = static_cast(recv_buf.data().dptr_); + EncodeDefaultKey(key, size, num_bytes) : + EncodeCompressedKey(key, size, false, num_bytes); + char* data = static_cast(recv_buf.data().dptr_); // false means not to delete data when SArray is deleted auto vals = new ps::SArray(data, size * num_bytes, false); // issue pull RequestType mode = (gradient_compression_->get_type() != CompressionType::kNone) ? RequestType::kCompressedPushPull : RequestType::kDefaultPushPull; - const int cmd = GetCommandType(mode, dtype); + const int cmd = GetCommandType(mode, dtype); CHECK_NOTNULL(ps_worker_)->ZPull(pskv.keys, vals, &pskv.lens, cmd, [vals, cb]() { delete vals; cb(); diff --git a/src/operator/contrib/bilinear_resize-inl.h b/src/operator/contrib/bilinear_resize-inl.h index be57acc36ce1..8afb63eff30b 100644 --- a/src/operator/contrib/bilinear_resize-inl.h +++ b/src/operator/contrib/bilinear_resize-inl.h @@ -273,9 +273,9 @@ static bool BilinearSampleOpInferShape(const nnvm::NodeAttrs& attrs, new_height = ((dshape[2] % 2) == 0) ? (int16_t)(dshape[2] * param.scale_height.value()) : (int16_t)((dshape[2] - 1) * param.scale_height.value()) + 1; - new_width = ((dshape[3] % 2) == 0) ? - (int16_t)(dshape[3] * param.scale_width.value()) : - (int16_t)((dshape[3] - 1) * param.scale_width.value()) + 1; + new_width = ((dshape[3] % 2) == 0) ? + (int16_t)(dshape[3] * param.scale_width.value()) : + (int16_t)((dshape[3] - 1) * param.scale_width.value()) + 1; break; } case bilinear_resize::like: { diff --git a/src/operator/contrib/bounding_box.cu b/src/operator/contrib/bounding_box.cu index ef2b7be50a37..e39e69c6fbbc 100644 --- a/src/operator/contrib/bounding_box.cu +++ b/src/operator/contrib/bounding_box.cu @@ -490,8 +490,8 @@ __launch_bounds__(NMS::THRESHOLD) __global__ for (int i = 0; i < n_threads / warp_size; ++i) { uint32_t my_mask = my_next_mask; my_next_mask = (((i + 1) < n_threads / warp_size) && (my_element_in_batch < topk)) ? - nms_results[(i + 1) * topk * num_batches + my_element] : - full_mask; + nms_results[(i + 1) * topk * num_batches + my_element] : + full_mask; if (my_warp == i && !__all_sync(full_mask, my_mask == full_mask)) { my_mask = my_mask | earlier_threads_mask; // Loop over warp_size - 1 because the last diff --git a/src/operator/contrib/multi_lamb.cc b/src/operator/contrib/multi_lamb.cc index 866567d6aa21..91920079a77f 100644 --- a/src/operator/contrib/multi_lamb.cc +++ b/src/operator/contrib/multi_lamb.cc @@ -44,8 +44,8 @@ struct MultiLAMBKernelStep1 { using namespace mshadow_op; for (size_t index = 0; index < kernel_params.ntensors; ++index) { if ((size_t)i < kernel_params.sizes[index]) { - MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : - MPDType(kernel_params.weights[index][i]); + MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : + MPDType(kernel_params.weights[index][i]); MPDType scaled_grad = static_cast(kernel_params.grads[index][i]) * rescale_grad; if (clip_gradient >= 0.0f) scaled_grad = mshadow_op::clip::Map(scaled_grad, static_cast(clip_gradient)); @@ -93,8 +93,8 @@ struct MultiLAMBKernelStep2 { if ((size_t)i < kernel_params.sizes[index]) { MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : MPDType(kernel_params.weights[index][i]); - float r1 = sqrt(sum_sq_weigths[index]); - float r2 = sqrt(sum_sq_temp_g[index]); + float r1 = sqrt(sum_sq_weigths[index]); + float r2 = sqrt(sum_sq_temp_g[index]); if (lower_bound >= 0) r1 = std::max(r1, lower_bound); if (upper_bound >= 0) diff --git a/src/operator/contrib/multi_lans.cc b/src/operator/contrib/multi_lans.cc index a7bb3ab69a77..4cc88928ff93 100644 --- a/src/operator/contrib/multi_lans.cc +++ b/src/operator/contrib/multi_lans.cc @@ -45,8 +45,8 @@ struct MultiLANSKernelStep1 { using namespace mshadow_op; for (size_t index = 0; index < kernel_params.ntensors; ++index) { if ((size_t)i < kernel_params.sizes[index]) { - MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : - MPDType(kernel_params.weights[index][i]); + MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : + MPDType(kernel_params.weights[index][i]); float g_norm = sqrt(g_sq_norm[index]); MPDType scaled_grad = static_cast(kernel_params.grads[index][i]) * rescale_grad; scaled_grad /= g_norm; @@ -95,8 +95,8 @@ struct MultiLANSKernelStep2 { const OpReqType req) { for (size_t index = 0; index < kernel_params.ntensors; ++index) { if ((size_t)i < kernel_params.sizes[index]) { - MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : - MPDType(kernel_params.weights[index][i]); + MPDType w = has_mixed_precision ? kernel_params.weights32[index][i] : + MPDType(kernel_params.weights[index][i]); float r1 = sqrt(sum_sq_weigths[index]); float r2_m = sqrt(sum_sq_temp_m[index]); float r2_g = sqrt(sum_sq_temp_g[index]); diff --git a/src/operator/nn/batch_norm.cu b/src/operator/nn/batch_norm.cu index 29f3f61b6808..6ff71aae18bd 100644 --- a/src/operator/nn/batch_norm.cu +++ b/src/operator/nn/batch_norm.cu @@ -282,7 +282,7 @@ __launch_bounds__(inference_forward_threads) __global__ AType invstd = small_num_channels ? saved_invstd[my_channel] : variance_to_invstd(runningVar[my_channel], epsilon); - AType mean = small_num_channels ? saved_mean[my_channel] : runningMean[my_channel]; + AType mean = small_num_channels ? saved_mean[my_channel] : runningMean[my_channel]; AType gamma = small_num_channels ? saved_weight[my_channel] : @@ -349,8 +349,8 @@ __global__ void BatchNormalizationUpdateOutputKernel(DeviceTensor input, const AccReal gamma = ((flags & FIX_GAMMA_FLAG) == 0 && weight.numElements() > 0) ? ScalarConvert::to(weight[plane]) : ScalarConvert::to(1); - const AccReal beta = bias.numElements() > 0 ? ScalarConvert::to(bias[plane]) : - ScalarConvert::to(0); + const AccReal beta = bias.numElements() > 0 ? ScalarConvert::to(bias[plane]) : + ScalarConvert::to(0); for (int batch = 0, nbatch = input.OuterSize(); batch < nbatch; ++batch) { for (int x = threadIdx.x, nx = input.InnerSize(); x < nx; x += blockDim.x) { const DType inp = input.get_ref(batch, plane, x); @@ -651,7 +651,7 @@ static __global__ void BatchNormalizationBackwardKernel(const DeviceTensor input const AccReal weightVal = ((flags & FIX_GAMMA_FLAG) == 0 && tensors.weight.numElements() > 0) ? ScalarConvert::to(tensors.weight[plane]) : AccReal(1); - const AccReal norm = AccReal(1) / N; + const AccReal norm = AccReal(1) / N; // Compute two values across (batch, x/y/z) in one pass: // 1. Sum(gradOutput) diff --git a/src/operator/nn/dnnl/dnnl_base.cc b/src/operator/nn/dnnl/dnnl_base.cc index 54af44c80fe4..adcd8f2751d9 100644 --- a/src/operator/nn/dnnl/dnnl_base.cc +++ b/src/operator/nn/dnnl/dnnl_base.cc @@ -242,19 +242,19 @@ const dnnl::memory* GetWeights(const NDArray& arr, int num_groups) { tz = dnnl::memory::dims{arr.shape()[O], arr.shape()[I]}; format_tag = dnnl::memory::format_tag::oi; } else if (ndim == 3) { - tz = num_groups > 1 ? - dnnl::memory::dims{ + tz = num_groups > 1 ? + dnnl::memory::dims{ num_groups, arr.shape()[O] / num_groups, arr.shape()[I], arr.shape()[H]} : - dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H]}; + dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H]}; format_tag = num_groups > 1 ? dnnl::memory::format_tag::goiw : dnnl::memory::format_tag::oiw; } else if (ndim == 4) { - tz = num_groups > 1 ? - dnnl::memory::dims{num_groups, + tz = num_groups > 1 ? + dnnl::memory::dims{num_groups, arr.shape()[O] / num_groups, arr.shape()[I], arr.shape()[H], arr.shape()[W]} : - dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H], arr.shape()[W]}; + dnnl::memory::dims{arr.shape()[O], arr.shape()[I], arr.shape()[H], arr.shape()[W]}; format_tag = num_groups > 1 ? dnnl::memory::format_tag::goihw : dnnl::memory::format_tag::oihw; } else if (ndim == 5) { tz = num_groups > 1 ? diff --git a/src/operator/nn/dnnl/dnnl_rnn.cc b/src/operator/nn/dnnl/dnnl_rnn.cc index 5ebad89089c3..051de78c7d5d 100644 --- a/src/operator/nn/dnnl/dnnl_rnn.cc +++ b/src/operator/nn/dnnl/dnnl_rnn.cc @@ -197,14 +197,14 @@ RnnPrimitive GetRnnFwdPrim(const DNNLRnnLayerParam& layer_param, auto src_cell_desc = memory::desc(layer_param.cell_dims, data_type, tag::ldnc); auto weight_peep_desc = memory::desc(); auto weight_proj_desc = layer_param.proj_size > 0 ? - memory::desc(layer_param.weight_proj_dims, weight_type, tag::any) : - memory::desc(); - auto dst_state_desc = layer_param.state_outputs ? - memory::desc(layer_param.state_dims, data_type, tag::ldnc) : - memory::desc(); - auto dst_cell_desc = layer_param.state_outputs ? - memory::desc(layer_param.cell_dims, data_type, tag::ldnc) : - memory::desc(); + memory::desc(layer_param.weight_proj_dims, weight_type, tag::any) : + memory::desc(); + auto dst_state_desc = layer_param.state_outputs ? + memory::desc(layer_param.state_dims, data_type, tag::ldnc) : + memory::desc(); + auto dst_cell_desc = layer_param.state_outputs ? + memory::desc(layer_param.cell_dims, data_type, tag::ldnc) : + memory::desc(); auto fwd = RnnPrimitive(); switch (mode) { @@ -266,8 +266,8 @@ RnnBwdPrimitive GetRnnBwdPrim(const DNNLRnnForwardTraining& fwd, memory::data_type weight_type = get_dnnl_type(params.dtype()); const prop_kind prop = prop_kind::backward; rnn_direction dnnl_rnn_direction = layer_param.bidirectional ? - rnn_direction::bidirectional_concat : - rnn_direction::unidirectional; + rnn_direction::bidirectional_concat : + rnn_direction::unidirectional; auto src_layer_desc = memory::desc(layer_param.src_dims, data_type, tag::tnc); auto weight_layer_desc = memory::desc(layer_param.weight_layer_dims, weight_type, tag::any); @@ -276,8 +276,8 @@ RnnBwdPrimitive GetRnnBwdPrim(const DNNLRnnForwardTraining& fwd, auto dst_layer_desc = memory::desc(layer_param.dst_dims, data_type, tag::tnc); auto src_state_desc = memory::desc(layer_param.state_dims, data_type, tag::ldnc); auto dst_state_desc = layer_param.state_outputs ? - memory::desc(layer_param.state_dims, data_type, tag::ldnc) : - memory::desc(); + memory::desc(layer_param.state_dims, data_type, tag::ldnc) : + memory::desc(); const void* fwd_pd = fwd.GetPrimDesc(); auto bwd = RnnBwdPrimitive(); @@ -1127,8 +1127,8 @@ void DNNLRnnOp::Forward(const OpContext& ctx, const int batch_size = default_param.batch_size_; const int state_size = default_param.state_size; const int iter_size = default_param.projection_size.has_value() ? - default_param.projection_size.value() : - default_param.state_size; + default_param.projection_size.value() : + default_param.state_size; const int directions = default_param.bidirectional ? 2 : 1; dnnl::memory::desc dst_desc({seq_length, batch_size, directions * iter_size}, get_dnnl_type(data_dtype), diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h index 9ee41cb8f9a6..71c205539efd 100644 --- a/src/operator/nn/softmax-inl.h +++ b/src/operator/nn/softmax-inl.h @@ -853,8 +853,8 @@ __global__ void masked_softmax_grad_kernel(OType* out, for (index_t i = x; i < M; i += x_size) { bool mask_value = bcst_mask_axis ? in_mask[base_mask] : in_mask[base_mask + i * sa_mask]; final_result = negate ? -OP2::Map(ograd[base + i * sa], out[base + i * sa], ssum) : - OP2::Map(ograd[base + i * sa], out[base + i * sa], ssum); - final_result = mask_value ? final_result / static_cast(temperature) : DType(0.0f); + OP2::Map(ograd[base + i * sa], out[base + i * sa], ssum); + final_result = mask_value ? final_result / static_cast(temperature) : DType(0.0f); KERNEL_ASSIGN(igrad[base + i * sa], Req, final_result); } } diff --git a/src/operator/optimizer_op.cc b/src/operator/optimizer_op.cc index c3fd47dadd17..ff5f4dd9f355 100644 --- a/src/operator/optimizer_op.cc +++ b/src/operator/optimizer_op.cc @@ -229,7 +229,7 @@ struct AdamStdDnsRspDnsKernel { for (index_t j = 0; j < row_length; j++) { const index_t data_i = row_i + j; DType grad_rescaled = non_zero ? static_cast(grad_data[grad_i + j] * rescale_grad) : - static_cast(0); + static_cast(0); if (clip_gradient >= 0.0f) { grad_rescaled = clip::Map(grad_rescaled, clip_gradient); } diff --git a/src/operator/optimizer_op.cu b/src/operator/optimizer_op.cu index 4c75eb0c72fc..01bd6f8ff1a0 100644 --- a/src/operator/optimizer_op.cu +++ b/src/operator/optimizer_op.cu @@ -164,7 +164,7 @@ struct AdamStdDnsRspDnsKernel { (row_id == 0) ? prefix_sum[0] > 0 : prefix_sum[row_id] > prefix_sum[row_id - 1]; const RType grad_offset = (prefix_sum[row_id] - 1) * row_length + col_id; DType grad_rescaled = non_zero ? static_cast(grad_data[grad_offset] * rescale_grad) : - static_cast(0); + static_cast(0); if (clip_gradient >= 0.0f) { grad_rescaled = clip::Map(grad_rescaled, clip_gradient); } diff --git a/src/operator/subgraph/dnnl/dnnl_conv.cc b/src/operator/subgraph/dnnl/dnnl_conv.cc index e9fab47e6f44..bc1f6fdc5aa5 100644 --- a/src/operator/subgraph/dnnl/dnnl_conv.cc +++ b/src/operator/subgraph/dnnl/dnnl_conv.cc @@ -472,7 +472,7 @@ static void SgDNNLConvParamParser(nnvm::NodeAttrs* attrs) { auto& post_act_param = (param_.full_conv_param.dnnl_param.with_act && !with_act) ? param_.full_conv_param.act_param : param_.full_conv_param.postsum_act_param; - with_act = true; + with_act = true; if (node_name == "Activation") { const auto act_param = nnvm::get(node->attrs.parsed); post_act_param.alg = GetDNNLActAlgo(act_param); diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h index c145273076b2..834b20a44165 100644 --- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h @@ -73,11 +73,13 @@ class TRT_Logger : public nvinfer1::ILogger { time_t rawtime = std::time(0); char buf[256]; strftime(&buf[0], 256, "%Y-%m-%d %H:%M:%S", std::gmtime(&rawtime)); + // clang-format off const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : severity == Severity::kERROR ? " ERROR" : severity == Severity::kWARNING ? "WARNING" : severity == Severity::kINFO ? " INFO" : "UNKNOWN"); + // clang-format on (*_ostream) << "[" << buf << " " << sevstr << "] " << msg << std::endl; } } From a9fcaf318820e2705dd2267561c4f1687d2aaf49 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Fri, 5 Nov 2021 07:42:24 +0100 Subject: [PATCH 09/10] Sanity-cpp fix --- cpp-package/include/mxnet-cpp/contrib.h | 4 ++-- cpp-package/include/mxnet-cpp/initializer.h | 4 +++- include/mxnet/operator.h | 4 ++-- plugin/torch/torch_module-inl.h | 2 +- src/operator/contrib/adamw.cu | 4 +++- src/operator/tensor/square_sum.cc | 4 +++- src/operator/tensor/square_sum.cu | 4 +++- tests/cpp/include/test_perf.h | 1 - tests/cpp/include/test_util.h | 1 - tests/cpp/operator/batchnorm_test.cc | 2 -- 10 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cpp-package/include/mxnet-cpp/contrib.h b/cpp-package/include/mxnet-cpp/contrib.h index c6ca3b834b14..b754ab5e5725 100644 --- a/cpp-package/include/mxnet-cpp/contrib.h +++ b/cpp-package/include/mxnet-cpp/contrib.h @@ -59,10 +59,10 @@ namespace contrib { // needs to be same with // https://github.com/apache/incubator-mxnet/blob/1c874cfc807cee755c38f6486e8e0f4d94416cd8/src/operator/subgraph/tensorrt/tensorrt-inl.h#L190 -static const std::string TENSORRT_SUBGRAPH_PARAM_IDENTIFIER = "subgraph_params_names"; +static const std::string TENSORRT_SUBGRAPH_PARAM_IDENTIFIER = "subgraph_params_names"; // NOLINT // needs to be same with // https://github.com/apache/incubator-mxnet/blob/master/src/operator/subgraph/tensorrt/tensorrt.cc#L244 -static const std::string TENSORRT_SUBGRAPH_PARAM_PREFIX = "subgraph_param_"; +static const std::string TENSORRT_SUBGRAPH_PARAM_PREFIX = "subgraph_param_"; // NOLINT /*! * this is a mimic to * https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/tensorrt.py#L37 diff --git a/cpp-package/include/mxnet-cpp/initializer.h b/cpp-package/include/mxnet-cpp/initializer.h index 5f509c2aa5a0..78ed2dfdecc8 100644 --- a/cpp-package/include/mxnet-cpp/initializer.h +++ b/cpp-package/include/mxnet-cpp/initializer.h @@ -197,7 +197,9 @@ class Xavier : public Initializer { enum RandType { gaussian, uniform } rand_type; enum FactorType { avg, in, out } factor_type; float magnitude; - Xavier(RandType rand_type = gaussian, FactorType factor_type = avg, float magnitude = 3) + Xavier(RandType rand_type = gaussian, // NOLINT + FactorType factor_type = avg, // NOLINT + float magnitude = 3) // NOLINT : rand_type(rand_type), factor_type(factor_type), magnitude(magnitude) {} void operator()(const std::string& name, NDArray* arr) override { diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index 268460fd7c25..a5ab13945899 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -109,8 +109,8 @@ class Operator { LOG(FATAL) << "Backward is not implemented"; } /*! \return [Deprecated] execution type of the operator */ - virtual ExecType exec_type() - const final { // NOLINT(*) exec_type has been moved to OperatorProperty + virtual ExecType exec_type() // NOLINT(*) exec_type has been moved to OperatorProperty + const final { // NOLINT(*) exec_type has been moved to OperatorProperty return ExecType::kSync; } }; diff --git a/plugin/torch/torch_module-inl.h b/plugin/torch/torch_module-inl.h index ef13493ba56b..57406811b484 100644 --- a/plugin/torch/torch_module-inl.h +++ b/plugin/torch/torch_module-inl.h @@ -208,7 +208,7 @@ class TorchModuleOp : public Operator { // iterate the grad of params lua_pushnil(L); it = in_grad.begin() + param_.num_data; - ; + while (lua_next(L, -2)) { TorchTensor::SetInternal( torchState_, diff --git a/src/operator/contrib/adamw.cu b/src/operator/contrib/adamw.cu index b67ea10e26a3..802378839bc2 100644 --- a/src/operator/contrib/adamw.cu +++ b/src/operator/contrib/adamw.cu @@ -28,8 +28,10 @@ namespace mxnet { namespace op { namespace adamw { +// clang-format off template <> -void GetScaleFloat(mshadow::Stream* s, const TBlob& scale_blob, float* pScalef){ +void GetScaleFloat(mshadow::Stream* s, const TBlob& scale_blob, float* pScalef) { + // clang-format on MSHADOW_REAL_TYPE_SWITCH( scale_blob.type_flag_, DType, diff --git a/src/operator/tensor/square_sum.cc b/src/operator/tensor/square_sum.cc index 05917b6c1382..6efef0af1266 100644 --- a/src/operator/tensor/square_sum.cc +++ b/src/operator/tensor/square_sum.cc @@ -26,8 +26,10 @@ namespace mxnet { namespace op { +// clang-format off template <> -void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx){ +void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx) { + // clang-format on MSHADOW_IDX_TYPE_SWITCH(ograd_row_idx.type_flag_, IType, { diff --git a/src/operator/tensor/square_sum.cu b/src/operator/tensor/square_sum.cu index d41f0aa02918..1cb27c1b9f0c 100644 --- a/src/operator/tensor/square_sum.cu +++ b/src/operator/tensor/square_sum.cu @@ -26,8 +26,10 @@ namespace mxnet { namespace op { +// clang-format off template <> -void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx){ +void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx) { +// clang-format on MSHADOW_IDX_TYPE_SWITCH(ograd_row_idx.type_flag_, IType, { diff --git a/tests/cpp/include/test_perf.h b/tests/cpp/include/test_perf.h index 2f215b5f68ee..94902f71a5f6 100644 --- a/tests/cpp/include/test_perf.h +++ b/tests/cpp/include/test_perf.h @@ -125,7 +125,6 @@ class TimedScope { inline void stop() { stopTime_ = getMicroTickCount(); - ; } inline float elapsedMilliseconds() const { diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h index 9b495388955c..48e3971a88be 100644 --- a/tests/cpp/include/test_util.h +++ b/tests/cpp/include/test_util.h @@ -444,7 +444,6 @@ inline StreamType& print_blob_(const RunContext& ctx, break; } else { os << " |" << std::flush; - ; } } if (r < height - 1) { diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc index 39d039c0b55c..55b7f421ce79 100644 --- a/tests/cpp/operator/batchnorm_test.cc +++ b/tests/cpp/operator/batchnorm_test.cc @@ -950,8 +950,6 @@ static void timingTest(const std::string& label, timing += info.executor_->GetTiming(); } } - while (false) - ; timing.print(&std::cout, label); std::cout << std::endl << std::flush; From ff48249586aaabda429a4ccf21e08fd9667af4f2 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Fri, 5 Nov 2021 08:25:18 +0100 Subject: [PATCH 10/10] Sanity-cpp fix part2 --- src/operator/tensor/square_sum.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/tensor/square_sum.cu b/src/operator/tensor/square_sum.cu index 1cb27c1b9f0c..e27e62d03e2a 100644 --- a/src/operator/tensor/square_sum.cu +++ b/src/operator/tensor/square_sum.cu @@ -29,7 +29,7 @@ namespace op { // clang-format off template <> void CheckSameIdx(const OpContext& ctx, const TBlob& ograd_row_idx, const TBlob& in_row_idx) { -// clang-format on + // clang-format on MSHADOW_IDX_TYPE_SWITCH(ograd_row_idx.type_flag_, IType, {