From a63bae9efc51991a0dbf5c9e75138605cd0b5682 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 11 Feb 2020 19:09:45 +0000 Subject: [PATCH 01/26] Added enum for sparse storage --- include/mxnet/lib_api.h | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 21f5cea125e4..5ffaf4f4202c 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -214,6 +214,18 @@ enum MXDType { kUNSET = 100, }; +/* + * MXTensor storage type. + */ +enum MXStorageType { + // dense + kDefaultStorage = 0, + // row sparse + kRowSparseStorage = 1, + // csr + kCSRStorage = 2, +}; + /*! * \brief Context info passing from MXNet OpContext * dev_type is string repr of supported context, currently only "cpu" and "gpu" @@ -233,16 +245,18 @@ enum MXReturnValue { * \brief Tensor data structure used by custom operator */ struct MXTensor { - MXTensor() : data_ptr(NULL), dtype(kUNSET), verID(0) {} + MXTensor() : data_ptr(NULL), dtype(kUNSET), verID(0), stype(kDefaultStorage) {} + // Construtor for dense. MXTensor(void *data_ptr, const std::vector &shape, MXDType dtype, - size_t vID, MXContext mx_ctx) - : data_ptr(data_ptr), shape(shape), dtype(dtype), verID(vID), ctx(mx_ctx) {} + size_t vID, MXContext mx_ctx, MXStorageType stype = kDefaultStorage) + : data_ptr(data_ptr), shape(shape), dtype(dtype), verID(vID), ctx(mx_ctx), stype(stype) {} /*! \brief populate internal tensor fields */ + // To do: solve for CSR and row sparse. void setTensor(void *dptr, MXDType type, const int64_t* dims, int ndims, - size_t vID, MXContext mx_ctx) { - data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; + size_t vID, MXContext mx_ctx, MXStorageType stype = kDefaultStorage) { + data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; stype = stype; shape.clear(); for (int j = 0; j < ndims; j++) { shape.push_back(dims[j]); @@ -335,11 +349,15 @@ struct MXTensor { verID == oth.verID && ctx.dev_type == oth.ctx.dev_type && ctx.dev_id == oth.ctx.dev_id && - shape == oth.shape; + shape == oth.shape && + stype == oth.stype; } - // data is flatten 1D repr of tensor, elements are in continuous memory - // user can access each element using the shape of tensor + /*! \brief get MXTensors storage type*/ + inline MXStorageType getStorageType() { return stype; } + + // For dense, data_ptr points to ChunkDense. + // For sparse, data_ptr points to ChunkSparse. void *data_ptr; // shape is in [2,3,4] format to represent high-dim tensor @@ -357,6 +375,9 @@ struct MXTensor { // corresponding DLTensor repr of MXTensor // easy way to reuse functions taking DLTensor DLTensor dltensor; + + // storage type + MXStorageType stype; }; /*! \brief resource malloc function to allocate memory inside Forward/Backward functions */ From 93cddf44cec8f027892d590bda9eae1630123f4f Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 11 Feb 2020 19:13:54 +0000 Subject: [PATCH 02/26] Add structure for Dense and Sparse --- include/mxnet/lib_api.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 5ffaf4f4202c..8d6ff20156e6 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -241,11 +241,43 @@ enum MXReturnValue { MX_SUCCESS = 1, }; +struct ChunkDense { + // Pointer to data. + void *data{nullptr}; + // Size of data in bytes. + size_t dataSize{0}; + // shape of data. + std::vector shape; + // Context of data. + // MXContext ctx; +}; + +struct ChunkSparse { + // Pointer to data. + void *data{nullptr}; + // Size of data in bytes. + size_t dataSize{0}; + // length of data. + int64_t data_lens; + + // To store aux data for sparse. + // for row_sparse, aux_data[0] = indices + // for csr, aux_data[0] = indptr, aux_data[1] = indices + std::vector> aux_data; + + // Lens of the aux_data. + // for row_sparse, aux_lens[0] = len(indices) + // for csr, aux_lens[0] = len(indptr), aux_lens[1] = len(indices) + std::vector aux_lens; + // Context of data. + // MXContext ctx; +}; + /*! * \brief Tensor data structure used by custom operator */ struct MXTensor { - MXTensor() : data_ptr(NULL), dtype(kUNSET), verID(0), stype(kDefaultStorage) {} + MXTensor() : data_ptr(nullptr), dtype(kUNSET), verID(0), stype(kDefaultStorage) {} // Construtor for dense. MXTensor(void *data_ptr, const std::vector &shape, MXDType dtype, @@ -350,7 +382,7 @@ struct MXTensor { ctx.dev_type == oth.ctx.dev_type && ctx.dev_id == oth.ctx.dev_id && shape == oth.shape && - stype == oth.stype; + stype == oth.stype; } /*! \brief get MXTensors storage type*/ From 8ccfbd2e9b8ef83a91ecbbefc2f4d73c8f48d96a Mon Sep 17 00:00:00 2001 From: guanxinq Date: Thu, 13 Feb 2020 23:07:41 +0000 Subject: [PATCH 03/26] redesign the data structure for MXSparse --- include/mxnet/lib_api.h | 48 ++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 8d6ff20156e6..5d574d1812b4 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -39,7 +39,7 @@ #include #include -#define MX_LIBRARY_VERSION 3 +#define MX_LIBRARY_VERSION 4 /*! * \brief For loading multiple custom op libraries in Linux, exporting same symbol multiple @@ -241,34 +241,21 @@ enum MXReturnValue { MX_SUCCESS = 1, }; -struct ChunkDense { +struct MXSparse { // Pointer to data. void *data{nullptr}; - // Size of data in bytes. - size_t dataSize{0}; - // shape of data. - std::vector shape; - // Context of data. - // MXContext ctx; -}; - -struct ChunkSparse { - // Pointer to data. - void *data{nullptr}; - // Size of data in bytes. - size_t dataSize{0}; - // length of data. - int64_t data_lens; + // length of (non-zero) data. + int64_t data_len; // To store aux data for sparse. - // for row_sparse, aux_data[0] = indices - // for csr, aux_data[0] = indptr, aux_data[1] = indices - std::vector> aux_data; - - // Lens of the aux_data. - // for row_sparse, aux_lens[0] = len(indices) - // for csr, aux_lens[0] = len(indptr), aux_lens[1] = len(indices) - std::vector aux_lens; + // For CSR, indices stores the col index of non-zero values. + // For row sparse, indices store row index of rows which have non-zero values. + std::vector indices; + + // For CSR, indptr gives the start and end index of data for each row. + // For row sparse, indptr is empty. + std::vector indptr; + // Context of data. // MXContext ctx; }; @@ -281,13 +268,13 @@ struct MXTensor { // Construtor for dense. MXTensor(void *data_ptr, const std::vector &shape, MXDType dtype, - size_t vID, MXContext mx_ctx, MXStorageType stype = kDefaultStorage) + size_t vID, MXContext mx_ctx, MXStorageType stype) : data_ptr(data_ptr), shape(shape), dtype(dtype), verID(vID), ctx(mx_ctx), stype(stype) {} /*! \brief populate internal tensor fields */ // To do: solve for CSR and row sparse. void setTensor(void *dptr, MXDType type, const int64_t* dims, int ndims, - size_t vID, MXContext mx_ctx, MXStorageType stype = kDefaultStorage) { + size_t vID, MXContext mx_ctx, MXStorageType stype) { data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; stype = stype; shape.clear(); for (int j = 0; j < ndims; j++) { @@ -385,11 +372,8 @@ struct MXTensor { stype == oth.stype; } - /*! \brief get MXTensors storage type*/ - inline MXStorageType getStorageType() { return stype; } - - // For dense, data_ptr points to ChunkDense. - // For sparse, data_ptr points to ChunkSparse. + // For dense, data_ptr points to data. + // For sparse, data_ptr points to MXSparse. void *data_ptr; // shape is in [2,3,4] format to represent high-dim tensor From 8c9b358468a2c676826348633706fac59028f993 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Fri, 14 Feb 2020 23:24:15 +0000 Subject: [PATCH 04/26] pull out aux data from sparse NDArray --- .../extensions/lib_subgraph/subgraph_lib.cc | 4 +-- include/mxnet/lib_api.h | 8 ++--- src/c_api/c_api.cc | 31 ++++++++++++++++++- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc index 0727eb786ad8..9c2b215f4c6c 100644 --- a/example/extensions/lib_subgraph/subgraph_lib.cc +++ b/example/extensions/lib_subgraph/subgraph_lib.cc @@ -84,7 +84,7 @@ MXReturnValue myExecutor(std::vector inputs, // get input tensor based on node ID inputs from data storage MXTensor &input = data[node_inputs.list[0].list[0].num]; // create temporary storage - MXTensor tmp(malloc(input.size()*4), input.shape, input.dtype, 0, {"cpu", 0}); + MXTensor tmp(malloc(input.size()*4), input.shape, input.dtype, 0, {"cpu", 0}, kDefaultStorage); // save allocated ptr to free later to_free.push_back(tmp.data_ptr); // execute log operator @@ -95,7 +95,7 @@ MXReturnValue myExecutor(std::vector inputs, // get input tensor based on node ID inputs from data storage MXTensor &input = data[node_inputs.list[0].list[0].num]; // create temporary storage - MXTensor tmp(malloc(input.size()*4), input.shape, input.dtype, 0, {"cpu", 0}); + MXTensor tmp(malloc(input.size()*4), input.shape, input.dtype, 0, {"cpu", 0}, kDefaultStorage); // save allocated ptr to free later to_free.push_back(tmp.data_ptr); // execute exp operator diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 5d574d1812b4..87eb62f42b5e 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -1150,14 +1150,14 @@ extern "C" { std::vector inputs(num_in); for (int i = 0; i < num_in; i++) { inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}); + inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); } // create a vector of tensors for outputs std::vector outputs(num_out); for (int i = 0; i < num_out; i++) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}); + outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream); @@ -1235,14 +1235,14 @@ extern "C" { std::vector inputs(num_in); for (int i = 0; i < num_in; i++) { inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}); + inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); } // create a vector of tensors for outputs std::vector outputs(num_out); for (int i = 0; i < num_out; i++) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}); + outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, stream); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f140b58d7eb7..a49bae51a705 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -114,13 +114,19 @@ void CustomFComputeDispatcher(const std::string op_name, const std::vector& req, const std::vector& outputs) { std::vector in_data, out_data; - std::vector in_shapes, out_shapes; + std::vector in_shapes, out_shapes; std::vector in_dims, out_dims; std::vector in_types, out_types; std::vector in_verIDs, out_verIDs; std::vector in_dev_type, out_dev_type; std::vector in_dev_id, out_dev_id; + // Aux data for sparse representation. + std::vector in_indices, out_indices; + std::vector in_indptr, out_indptr; + std::vector in_indices_shapes, out_indices_shapes; + std::vector in_indptr_shapes, out_indptr_shapes; + // convert inputs/outpus NDArray to C types to be passed to lib_api.h for (size_t i = 0; i < inputs.size(); i++) { in_data.push_back(inputs[i].data().dptr_); @@ -131,6 +137,17 @@ void CustomFComputeDispatcher(const std::string op_name, const char* ctx_str = inputs[i].ctx().dev_mask() == Context::kCPU ? "cpu" : "gpu"; in_dev_type.push_back(ctx_str); in_dev_id.push_back(inputs[i].ctx().real_dev_id()); + + if(inputs[i].storage_type() == mxnet::kRowSparseStorage) { + in_indices.push_back(inputs[i].aux_data(rowsparse::kIdx).dptr_); + in_indices_shapes.push_back(inputs[i].aux_shape(rowsparse::kIdx).Size()); + } + else if(inputs[i].storage_type() == mxnet::kCSRStorage) { + in_indices.push_back(inputs[i].aux_data(csr::kIdx).dptr_); + in_indptr.push_back(inputs[i].aux_data(csr::kIndPtr).dptr_); + in_indices_shapes.push_back(inputs[i].aux_shape(rowsparse::kIdx).Size()); + in_indptr_shapes.push_back(inputs[i].aux_shape(csr::kIndPtr).Size()); + } } for (size_t i = 0; i < outputs.size(); i++) { @@ -142,6 +159,18 @@ void CustomFComputeDispatcher(const std::string op_name, const char* ctx_str = outputs[i].ctx().dev_mask() == Context::kCPU ? "cpu" : "gpu"; out_dev_type.push_back(ctx_str); out_dev_id.push_back(outputs[i].ctx().real_dev_id()); + + // To do: Find ways to handle out dimenson is unknown for sparse. + if(outputs[i].storage_type() == mxnet::kRowSparseStorage) { + out_indices.push_back(outputs[i].aux_data(rowsparse::kIdx).dptr_); + out_indices_shapes.push_back(outputs[i].aux_shape(rowsparse::kIdx).Size()); + } + else if(outputs[i].storage_type() == mxnet::kCSRStorage) { + out_indices.push_back(outputs[i].aux_data(csr::kIdx).dptr_); + out_indptr.push_back(outputs[i].aux_data(csr::kIndPtr).dptr_); + out_indices_shapes.push_back(outputs[i].aux_shape(rowsparse::kIdx).Size()); + out_indptr_shapes.push_back(outputs[i].aux_shape(csr::kIndPtr).Size()); + } } // get memory resource and mxnet backend streams From 2bf9200742fa825d6eb3854dcc9d32dcde63873e Mon Sep 17 00:00:00 2001 From: guanxinq Date: Sat, 15 Feb 2020 00:57:28 +0000 Subject: [PATCH 05/26] Added more sparse arguments to API interface --- include/mxnet/lib_api.h | 16 ++++++++++++---- src/c_api/c_api.cc | 12 ++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 87eb62f42b5e..fa5116ffa761 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -911,7 +911,9 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, - xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream); + xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, + void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, + int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); #define MXLIB_OPCALLMUTATEINPUTS_STR "_opCallMutateInputs" typedef int (*opCallMutateInputs_t)(mutateInputs_t mutate, const char* const* keys, @@ -934,7 +936,9 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, - xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream); + xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, + void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, + int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); #define MXLIB_PARTREGSIZE_STR "_partRegSize" typedef int (*partRegSize_t)(void); @@ -1139,7 +1143,9 @@ extern "C" { const int64_t** outshapes, int* outdims, void** outdata, int* outtypes, size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, - xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream) { + xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, + void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, + int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create map of attributes from list std::map attrs; for (int i = 0; i < num; i++) { @@ -1230,7 +1236,9 @@ extern "C" { const int64_t** outshapes, int* outdims, void** outdata, int* outtypes, size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, - xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream) { + xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, + void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, + int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create a vector of tensors for inputs std::vector inputs(num_in); for (int i = 0; i < num_in; i++) { diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index a49bae51a705..c90a49560cf3 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -227,12 +227,18 @@ void CustomFComputeDispatcher(const std::string op_name, attr_vals.push_back(kv.second.c_str()); } // call fcompute function + // std::vector in_indices, out_indices; + std::vector in_indptr, out_indptr; + std::vector in_indices_shapes, out_indices_shapes; + std::vector in_indptr_shapes, out_indptr_shapes; CHECK(callFComp(fcomp_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(), in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(), in_verIDs.data(), in_dev_type.data(), in_dev_id.data(), in_data.size(), out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), - cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream)) + cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, + in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), + out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FCompute for custom operator '" << op_name << "'"; } @@ -251,7 +257,9 @@ void CustomFComputeDispatcher(const std::string op_name, out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), - cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream)) + cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, + in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), + out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"; } } From 7eba53c373531624ea111d527e0b17b235177334 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Mon, 17 Feb 2020 22:03:34 +0000 Subject: [PATCH 06/26] Passed sparse from c_api to lib_api.h and set in MXTensor --- include/mxnet/lib_api.h | 90 ++++++++++++++++++++++++++++++++++------- src/c_api/c_api.cc | 15 ++++--- 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index fa5116ffa761..c6dd04a1b691 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -256,8 +256,16 @@ struct MXSparse { // For row sparse, indptr is empty. std::vector indptr; - // Context of data. - // MXContext ctx; + void set(void *Data, const int64_t* Dims, int NDims, void *Indices, + int64_t IndicesLen, void *IndPtr = nullptr, int64_t IndPtrLen = 0) { + data = Data; + data_len = 1; + for (int i = 0; i < NDims; i++) { + data_len *= Dims[i]; + } + indices.assign((int64_t*)Indices, (int64_t*)Indices + IndicesLen); + if(IndPtr) indptr.assign((int64_t*)IndPtr, (int64_t*)IndPtr + IndPtrLen); + } }; /*! @@ -272,7 +280,6 @@ struct MXTensor { : data_ptr(data_ptr), shape(shape), dtype(dtype), verID(vID), ctx(mx_ctx), stype(stype) {} /*! \brief populate internal tensor fields */ - // To do: solve for CSR and row sparse. void setTensor(void *dptr, MXDType type, const int64_t* dims, int ndims, size_t vID, MXContext mx_ctx, MXStorageType stype) { data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; stype = stype; @@ -912,8 +919,10 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, - int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); + void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); #define MXLIB_OPCALLMUTATEINPUTS_STR "_opCallMutateInputs" typedef int (*opCallMutateInputs_t)(mutateInputs_t mutate, const char* const* keys, @@ -937,8 +946,10 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, - int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); + void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); #define MXLIB_PARTREGSIZE_STR "_partRegSize" typedef int (*partRegSize_t)(void); @@ -1144,8 +1155,9 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, - int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { + void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create map of attributes from list std::map attrs; for (int i = 0; i < num; i++) { @@ -1154,16 +1166,63 @@ extern "C" { // create a vector of tensors for inputs std::vector inputs(num_in); + for (int i = 0; i < num_in; i++) { - inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); + MXStorageType type; + void *data = nullptr; + MXSparse sparse; + // Dense representation. + if(in_indices_shapes[i] == 0) { + type = kDefaultStorage; + data = indata[i]; + } + // Sparse representation. + else { + // To do: remove if else. + if(in_indptr_shapes[i] == 0) { + type = kRowSparseStorage; + sparse.set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); + } + else { + type = kCSRStorage; + sparse.set(indata[i], inshapes[i], indims[i], in_indices[i], + in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); + } + data = (void*)(&sparse); + } + + inputs[i].setTensor(data, (MXDType)intypes[i], inshapes[i], indims[i], + inIDs[i], {indev_type[i], indev_id[i]}, type); } // create a vector of tensors for outputs std::vector outputs(num_out); for (int i = 0; i < num_out; i++) { - outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); + MXStorageType type; + void *data = nullptr; + MXSparse sparse; + // Dense representation. + if(out_indices_shapes[i] == 0) { + type = kDefaultStorage; + data = outdata[i]; + } + // Sparse representation. + else { + // To do: remove if else. + if(out_indptr_shapes[i] == 0) { + type = kRowSparseStorage; + sparse.set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); + } + else { + type = kCSRStorage; + sparse.set(outdata[i], outshapes[i], outdims[i], out_indices[i], + out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); + } + data = (void*)(&sparse); + } + + outputs[i].setTensor(data, (MXDType)outtypes[i], outshapes[i], outdims[i], + outIDs[i], {outdev_type[i], outdev_id[i]}, type); } OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream); @@ -1237,8 +1296,9 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, - int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { + void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create a vector of tensors for inputs std::vector inputs(num_in); for (int i = 0; i < num_in; i++) { diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index c90a49560cf3..799faee0f112 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -226,19 +226,17 @@ void CustomFComputeDispatcher(const std::string op_name, attr_keys.push_back(kv.first.c_str()); attr_vals.push_back(kv.second.c_str()); } + // call fcompute function - // std::vector in_indices, out_indices; - std::vector in_indptr, out_indptr; - std::vector in_indices_shapes, out_indices_shapes; - std::vector in_indptr_shapes, out_indptr_shapes; CHECK(callFComp(fcomp_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(), in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(), in_verIDs.data(), in_dev_type.data(), in_dev_id.data(), in_data.size(), out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), - out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) + in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices_shapes.data(), out_indices_shapes.data(), + in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FCompute for custom operator '" << op_name << "'"; } @@ -258,8 +256,9 @@ void CustomFComputeDispatcher(const std::string op_name, out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), - out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) + in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices_shapes.data(), out_indices_shapes.data(), + in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"; } } From 3fdf771c2fae03b4a8c3417e00f9fc27cb18a84c Mon Sep 17 00:00:00 2001 From: guanxinq Date: Mon, 17 Feb 2020 22:28:42 +0000 Subject: [PATCH 07/26] Fix indent --- include/mxnet/lib_api.h | 24 ++++++++++++------------ src/c_api/c_api.cc | 8 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index c6dd04a1b691..856714c363eb 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -919,8 +919,8 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indices, void** out_indices, - void** in_indptr, void** out_indptr, + void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); @@ -946,10 +946,10 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indices, void** out_indices, - void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); + void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); #define MXLIB_PARTREGSIZE_STR "_partRegSize" typedef int (*partRegSize_t)(void); @@ -1155,9 +1155,9 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { + void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create map of attributes from list std::map attrs; for (int i = 0; i < num; i++) { @@ -1296,9 +1296,9 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { + void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create a vector of tensors for inputs std::vector inputs(num_in); for (int i = 0; i < num_in; i++) { diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 799faee0f112..aafec1ba904c 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -234,9 +234,9 @@ void CustomFComputeDispatcher(const std::string op_name, out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), - in_indices_shapes.data(), out_indices_shapes.data(), - in_indptr_shapes.data(), out_indptr_shapes.data())) + in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices_shapes.data(), out_indices_shapes.data(), + in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FCompute for custom operator '" << op_name << "'"; } @@ -256,7 +256,7 @@ void CustomFComputeDispatcher(const std::string op_name, out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"; From a1aa78f64d395d02534327072e6695fdf8414c52 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Wed, 19 Feb 2020 00:11:16 +0000 Subject: [PATCH 08/26] fix segfault --- include/mxnet/lib_api.h | 23 +++++++++++------------ src/c_api/c_api.cc | 3 +-- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 856714c363eb..64a01db90fb6 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -256,15 +256,15 @@ struct MXSparse { // For row sparse, indptr is empty. std::vector indptr; - void set(void *Data, const int64_t* Dims, int NDims, void *Indices, - int64_t IndicesLen, void *IndPtr = nullptr, int64_t IndPtrLen = 0) { - data = Data; + void set(void *data_ptr, const int64_t* dims, int ndims, void *idx, + int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0) { + data = data_ptr; data_len = 1; - for (int i = 0; i < NDims; i++) { - data_len *= Dims[i]; + for (int i = 0; i < ndims; i++) { + data_len *= dims[i]; } - indices.assign((int64_t*)Indices, (int64_t*)Indices + IndicesLen); - if(IndPtr) indptr.assign((int64_t*)IndPtr, (int64_t*)IndPtr + IndPtrLen); + indices.assign((int64_t*)idx, (int64_t*)idx + num_idx); + if(idx_ptr) indptr.assign((int64_t*)idx_ptr, (int64_t*)idx_ptr + num_idx_ptr); } }; @@ -1172,14 +1172,14 @@ extern "C" { void *data = nullptr; MXSparse sparse; // Dense representation. - if(in_indices_shapes[i] == 0) { + if(!in_indices_shapes) { type = kDefaultStorage; data = indata[i]; } // Sparse representation. else { // To do: remove if else. - if(in_indptr_shapes[i] == 0) { + if(!in_indptr_shapes) { type = kRowSparseStorage; sparse.set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); } @@ -1190,7 +1190,6 @@ extern "C" { } data = (void*)(&sparse); } - inputs[i].setTensor(data, (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, type); } @@ -1202,14 +1201,14 @@ extern "C" { void *data = nullptr; MXSparse sparse; // Dense representation. - if(out_indices_shapes[i] == 0) { + if(!out_indices_shapes) { type = kDefaultStorage; data = outdata[i]; } // Sparse representation. else { // To do: remove if else. - if(out_indptr_shapes[i] == 0) { + if(!out_indptr_shapes) { type = kRowSparseStorage; sparse.set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index aafec1ba904c..d83b2ea209a4 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -121,7 +121,7 @@ void CustomFComputeDispatcher(const std::string op_name, std::vector in_dev_type, out_dev_type; std::vector in_dev_id, out_dev_id; - // Aux data for sparse representation. + // Extra data for sparse representation. std::vector in_indices, out_indices; std::vector in_indptr, out_indptr; std::vector in_indices_shapes, out_indices_shapes; @@ -246,7 +246,6 @@ void CustomFComputeDispatcher(const std::string op_name, CustomStatefulOp* state_op_inst = op.get_instance(); CHECK(state_op_inst != nullptr) << "Error custom stateful operator is null for operator '" << op_name << "'"; - // call fcompute function CHECK(callFStatefulComp(stateful_forward_flag, state_op_inst, in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(), From 0537deb50ab2bb8c219ba7ff9320a4affba52b06 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 25 Feb 2020 18:35:53 +0000 Subject: [PATCH 09/26] Fix NDArray to MXTensor errors --- include/mxnet/lib_api.h | 34 +++++++++++++++------------------- src/c_api/c_api.cc | 9 +++++---- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 64a01db90fb6..610a4fbce9d8 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -259,10 +259,7 @@ struct MXSparse { void set(void *data_ptr, const int64_t* dims, int ndims, void *idx, int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0) { data = data_ptr; - data_len = 1; - for (int i = 0; i < ndims; i++) { - data_len *= dims[i]; - } + data_len = num_idx; indices.assign((int64_t*)idx, (int64_t*)idx + num_idx); if(idx_ptr) indptr.assign((int64_t*)idx_ptr, (int64_t*)idx_ptr + num_idx_ptr); } @@ -281,8 +278,8 @@ struct MXTensor { /*! \brief populate internal tensor fields */ void setTensor(void *dptr, MXDType type, const int64_t* dims, int ndims, - size_t vID, MXContext mx_ctx, MXStorageType stype) { - data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; stype = stype; + size_t vID, MXContext mx_ctx, MXStorageType storage_type) { + data_ptr = dptr; dtype = type; verID = vID; ctx = mx_ctx; stype = storage_type; shape.clear(); for (int j = 0; j < ndims; j++) { shape.push_back(dims[j]); @@ -1167,10 +1164,12 @@ extern "C" { // create a vector of tensors for inputs std::vector inputs(num_in); + MXStorageType type; + void *data = nullptr; + void *data2 = nullptr; + MXSparse sparse; + MXSparse sparse2; for (int i = 0; i < num_in; i++) { - MXStorageType type; - void *data = nullptr; - MXSparse sparse; // Dense representation. if(!in_indices_shapes) { type = kDefaultStorage; @@ -1193,34 +1192,31 @@ extern "C" { inputs[i].setTensor(data, (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, type); } - // create a vector of tensors for outputs std::vector outputs(num_out); + for (int i = 0; i < num_out; i++) { - MXStorageType type; - void *data = nullptr; - MXSparse sparse; - // Dense representation. + //MXStorageType type2; if(!out_indices_shapes) { type = kDefaultStorage; - data = outdata[i]; + data2 = outdata[i]; } // Sparse representation. else { // To do: remove if else. if(!out_indptr_shapes) { type = kRowSparseStorage; - sparse.set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); + sparse2.set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); } else { type = kCSRStorage; - sparse.set(outdata[i], outshapes[i], outdims[i], out_indices[i], + sparse2.set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); } - data = (void*)(&sparse); + data2 = (void*)(&sparse2); } - outputs[i].setTensor(data, (MXDType)outtypes[i], outshapes[i], outdims[i], + outputs[i].setTensor(data2, (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, type); } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index d83b2ea209a4..19c7ff695003 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -145,7 +145,7 @@ void CustomFComputeDispatcher(const std::string op_name, else if(inputs[i].storage_type() == mxnet::kCSRStorage) { in_indices.push_back(inputs[i].aux_data(csr::kIdx).dptr_); in_indptr.push_back(inputs[i].aux_data(csr::kIndPtr).dptr_); - in_indices_shapes.push_back(inputs[i].aux_shape(rowsparse::kIdx).Size()); + in_indices_shapes.push_back(inputs[i].aux_shape(csr::kIdx).Size()); in_indptr_shapes.push_back(inputs[i].aux_shape(csr::kIndPtr).Size()); } } @@ -246,6 +246,7 @@ void CustomFComputeDispatcher(const std::string op_name, CustomStatefulOp* state_op_inst = op.get_instance(); CHECK(state_op_inst != nullptr) << "Error custom stateful operator is null for operator '" << op_name << "'"; + // call fcompute function CHECK(callFStatefulComp(stateful_forward_flag, state_op_inst, in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(), @@ -607,10 +608,10 @@ int MXLoadLib(const char *path) { std::vector* in_stypes, std::vector* out_stypes) { // TODO(ziyimu): remove this dense enforce check after supporting sparse tensor - CHECK(mxnet::common::ContainsOnlyStorage(*in_stypes, mxnet::kDefaultStorage)) - << "Error input tensors are not dense for custom operator '" << name_str << "'"; + //CHECK(mxnet::common::ContainsOnlyStorage(*in_stypes, mxnet::kDefaultStorage)) + //<< "Error input tensors are not dense for custom operator '" << name_str << "'"; // set outputs as dense - return op::storage_type_assign(out_stypes, mxnet::kDefaultStorage, + return op::storage_type_assign(out_stypes, mxnet::kCSRStorage, dispatch_mode, DispatchMode::kFComputeEx); }; From 4f44695fde3506c99280f94820ccf5255deb1d53 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 25 Feb 2020 18:36:59 +0000 Subject: [PATCH 10/26] Add a sample of sparse(CSR) transpose --- example/extensions/lib_custom_op/Makefile | 7 +- .../lib_custom_op/test_transsparse.py | 46 ++++ .../lib_custom_op/transsparse_lib.cc | 204 ++++++++++++++++++ 3 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 example/extensions/lib_custom_op/test_transsparse.py create mode 100644 example/extensions/lib_custom_op/transsparse_lib.cc diff --git a/example/extensions/lib_custom_op/Makefile b/example/extensions/lib_custom_op/Makefile index edd753b0759c..b4ceafc9fbdf 100644 --- a/example/extensions/lib_custom_op/Makefile +++ b/example/extensions/lib_custom_op/Makefile @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -all: gemm_lib relu_lib +all: gemm_lib relu_lib transsparse_lib gemm_lib: g++ -shared -fPIC -std=c++11 gemm_lib.cc -o libgemm_lib.so -I ../../../include/mxnet @@ -23,5 +23,8 @@ gemm_lib: relu_lib: nvcc -shared -std=c++11 -Xcompiler -fPIC relu_lib.cu -o librelu_lib.so -I ../../../include/mxnet +transsparse_lib: + g++ -shared -fPIC -std=c++11 transsparse_lib.cc -o libtranssparse_lib.so -I ../../../include/mxnet + clean: - rm -rf libgemm_lib.so librelu_lib.so + rm -rf libgemm_lib.so librelu_lib.so libtranssparse_lib.so diff --git a/example/extensions/lib_custom_op/test_transsparse.py b/example/extensions/lib_custom_op/test_transsparse.py new file mode 100644 index 000000000000..e54f03a56761 --- /dev/null +++ b/example/extensions/lib_custom_op/test_transsparse.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable=arguments-differ + +# This test checks dynamic loading of custom library into MXNet +# and checks end to end compute of a simple 2D gemm custom op + +import mxnet as mx +import os + +#load library +if (os.name=='posix'): + path = os.path.abspath('libtranssparse_lib.so') + mx.library.load(path) +elif (os.name=='nt'): + path = os.path.abspath('libtranssparse_lib.dll') + mx.library.load(path) + +a = mx.nd.array([[1,3,0,2,1],[0,1,0,0,0],[0,2,4,5,3]]) +a = a.tostype('csr') +print(type(a)) +print(a.data.asnumpy()) +print(a.indices.asnumpy()) +print(a.indptr.asnumpy()) + +# To do: Fix segment fault. +b = mx.nd.my_transsparse(a) +print("B Type:", type(b)) diff --git a/example/extensions/lib_custom_op/transsparse_lib.cc b/example/extensions/lib_custom_op/transsparse_lib.cc new file mode 100644 index 000000000000..50714edaab5c --- /dev/null +++ b/example/extensions/lib_custom_op/transsparse_lib.cc @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2020 by Contributors + * \file transsparse_lib.cc + * \brief Sample 2D transpose custom operator. + */ + +#include +#include "lib_api.h" + +void transpose(MXTensor src, MXTensor dst) { + typedef MXSparse DType; + DType* A = src.data(); + DType* B = dst.data(); + + std::vector shape = src.shape; + int64_t h = shape[0]; + int64_t w = shape[1]; + + if(src.stype == kRowSparseStorage) { + //To do: add implementation. + } + else if(src.stype == kCSRStorage) { + // To do: fix type. + float *Aval = (float*) (A->data); + float *Bval = (float*) (B->data); + std::vector rowPtr(w + 2, 0); + // count column + for(int i = 0; i < A->data_len; i++) { + rowPtr[A->indices[i] + 2]++; + } + // Accumulated sum + for(int i = 2; i < rowPtr.size(); i++) { + rowPtr[i] += rowPtr[i - 1]; + } + // Get the dst sparse matrix. + for(int i = 0; i < h; i++) { + for(int j = A->indptr[i]; j < A->indptr[i + 1]; j++) { + int index = rowPtr[A->indices[j] + 1]++; + Bval[index] = Aval[j]; + B->indices[index] = i; + } + } + B->indptr.assign(rowPtr.begin(), rowPtr.begin() + w + 1); + } + // To do: Remove test. + /* + std::cout << "#### Print B" << std::endl; + float *Bval = (float*) (B->data); + for(int i = 0; i < 9; i++) + std::cout << Bval[i] << " "; + std::cout << std::endl; + for(auto i: B->indices) + std::cout << i << " "; + std::cout << std::endl; + for(auto i: B->indptr) + std::cout << i << " "; + std::cout << std::endl; + */ +} + +MXReturnValue forward(std::map attrs, + std::vector inputs, + std::vector outputs, + OpResource res) { + + // The data types and storage types of inputs and outputs should be the same. + if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) + return MX_FAIL; + + transpose(inputs[0], outputs[0]); + return MX_SUCCESS; +} + +MXReturnValue backward(std::map attrs, + std::vector inputs, + std::vector outputs, + OpResource res) { + return MX_SUCCESS; +} + +MXReturnValue parseAttrs(std::map attrs, int* num_in, int* num_out) { + *num_in = 1; + *num_out = 1; + return MX_SUCCESS; +} + +MXReturnValue inferType(std::map attrs, + std::vector &intypes, + std::vector &outtypes) { + // validate inputs + if (intypes.size() != 1) { + std::cout << "Expected 1 inputs to inferType" << std::endl; + return MX_FAIL; + } + for (unsigned i = 0; i < intypes.size(); i++) { + if (intypes[i] != kFloat32) { + std::cout << "Expected input " << i << " to have float32 type" << std::endl; + return MX_FAIL; + } + } + + outtypes[0] = intypes[0]; + return MX_SUCCESS; +} + +MXReturnValue inferShape(std::map attrs, + std::vector> &inshapes, + std::vector> &outshapes) { + // validate inputs + if (inshapes.size() != 1) { + std::cout << "Expected 1 inputs to inferShape" << std::endl; + return MX_FAIL; + } + + outshapes[0].push_back(inshapes[0][1]); + outshapes[0].push_back(inshapes[0][0]); + return MX_SUCCESS; +} + +REGISTER_OP(my_transsparse) +.setForward(forward, "cpu") +.setBackward(backward, "cpu") +.setParseAttrs(parseAttrs) +.setInferType(inferType) +.setInferShape(inferShape); + +/* ------------------------------------------------------------------------- */ + +class MyStatefulTransSparse : public CustomStatefulOp { + public: + explicit MyStatefulTransSparse(int count) : count(count) {} + + MXReturnValue Forward(std::vector inputs, + std::vector outputs, + OpResource op_res) { + std::cout << "Info: keyword + number of forward: " << ++count << std::endl; + std::map attrs; + return forward(attrs, inputs, outputs, op_res); + } + + MXReturnValue Backward(std::vector inputs, + std::vector outputs, + OpResource op_res) { + std::map attrs; + return backward(attrs, inputs, outputs, op_res); + } + + ~MyStatefulTransSparse() {} + + private: + int count; +}; + +MXReturnValue createOpState(std::map attrs, + CustomStatefulOp** op_inst) { + // testing passing of keyword arguments + int count = attrs.count("test_kw") > 0 ? std::stoi(attrs["test_kw"]) : 0; + // creating stateful operator instance + *op_inst = new MyStatefulTransSparse(count); + std::cout << "Info: stateful operator created" << std::endl; + return MX_SUCCESS; +} + +MXReturnValue mutateInputs(std::map attrs, + std::vector &input_indices) { + // input_indices.push_back(1); // mark mutate input + return MX_SUCCESS; +} + +REGISTER_OP(state_transsparse) +.setParseAttrs(parseAttrs) +.setInferType(inferType) +.setInferShape(inferShape) +.setMutateInputs(mutateInputs) +.setCreateOpState(createOpState, "cpu"); + +MXReturnValue initialize(int version) { + if (version >= 10400) { + std::cout << "MXNet version " << version << " supported" << std::endl; + return MX_SUCCESS; + } else { + std::cout << "MXNet version " << version << " not supported" << std::endl; + return MX_FAIL; + } +} From ade3e46db99564c71f270c22e17c7cc62d5ad13e Mon Sep 17 00:00:00 2001 From: guanxinq Date: Wed, 26 Feb 2020 23:41:47 +0000 Subject: [PATCH 11/26] Make CSR transpose temporarily work by hardcoding --- .../lib_custom_op/test_transsparse.py | 3 +++ .../lib_custom_op/transsparse_lib.cc | 16 +--------------- include/mxnet/lib_api.h | 18 +++++++++++++----- src/c_api/c_api.cc | 7 +++++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/example/extensions/lib_custom_op/test_transsparse.py b/example/extensions/lib_custom_op/test_transsparse.py index e54f03a56761..e64ad2468483 100644 --- a/example/extensions/lib_custom_op/test_transsparse.py +++ b/example/extensions/lib_custom_op/test_transsparse.py @@ -44,3 +44,6 @@ # To do: Fix segment fault. b = mx.nd.my_transsparse(a) print("B Type:", type(b)) +print(b.data.asnumpy()) +print(b.indices.asnumpy()) +print(b.indptr.asnumpy()) diff --git a/example/extensions/lib_custom_op/transsparse_lib.cc b/example/extensions/lib_custom_op/transsparse_lib.cc index 50714edaab5c..8ae78c2c32b1 100644 --- a/example/extensions/lib_custom_op/transsparse_lib.cc +++ b/example/extensions/lib_custom_op/transsparse_lib.cc @@ -59,22 +59,8 @@ void transpose(MXTensor src, MXTensor dst) { B->indices[index] = i; } } - B->indptr.assign(rowPtr.begin(), rowPtr.begin() + w + 1); + memcpy(B->indptr, rowPtr.data(), sizeof(int64_t) * (w + 1)); } - // To do: Remove test. - /* - std::cout << "#### Print B" << std::endl; - float *Bval = (float*) (B->data); - for(int i = 0; i < 9; i++) - std::cout << Bval[i] << " "; - std::cout << std::endl; - for(auto i: B->indices) - std::cout << i << " "; - std::cout << std::endl; - for(auto i: B->indptr) - std::cout << i << " "; - std::cout << std::endl; - */ } MXReturnValue forward(std::map attrs, diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 610a4fbce9d8..e0241890addd 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -250,18 +250,26 @@ struct MXSparse { // To store aux data for sparse. // For CSR, indices stores the col index of non-zero values. // For row sparse, indices store row index of rows which have non-zero values. - std::vector indices; + int64_t* indices; + int64_t indices_len; // For CSR, indptr gives the start and end index of data for each row. // For row sparse, indptr is empty. - std::vector indptr; + int64_t* indptr; + int64_t indptr_len; void set(void *data_ptr, const int64_t* dims, int ndims, void *idx, int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0) { data = data_ptr; data_len = num_idx; - indices.assign((int64_t*)idx, (int64_t*)idx + num_idx); - if(idx_ptr) indptr.assign((int64_t*)idx_ptr, (int64_t*)idx_ptr + num_idx_ptr); + + indices = (int64_t*)idx; + indices_len = num_idx; + + if(idx_ptr) { + indptr = (int64_t*)idx_ptr; + indptr_len = num_idx_ptr; + } } }; @@ -1219,7 +1227,7 @@ extern "C" { outputs[i].setTensor(data2, (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, type); } - + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream); return fcomp(attrs, inputs, outputs, res); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 19c7ff695003..fec999d40282 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -130,6 +130,7 @@ void CustomFComputeDispatcher(const std::string op_name, // convert inputs/outpus NDArray to C types to be passed to lib_api.h for (size_t i = 0; i < inputs.size(); i++) { in_data.push_back(inputs[i].data().dptr_); + // To do: remove. ndims = 2. 3*5. in_shapes.push_back(inputs[i].shape().data()); in_dims.push_back(inputs[i].shape().ndim()); in_types.push_back(inputs[i].dtype()); @@ -151,6 +152,8 @@ void CustomFComputeDispatcher(const std::string op_name, } for (size_t i = 0; i < outputs.size(); i++) { + // To do: remove hardcode. + outputs[i].CheckAndAlloc({mshadow::Shape1(5 + 1), mshadow::Shape1(9)}); out_data.push_back(outputs[i].data().dptr_); out_shapes.push_back(outputs[i].shape().data()); out_dims.push_back(outputs[i].shape().ndim()); @@ -168,7 +171,7 @@ void CustomFComputeDispatcher(const std::string op_name, else if(outputs[i].storage_type() == mxnet::kCSRStorage) { out_indices.push_back(outputs[i].aux_data(csr::kIdx).dptr_); out_indptr.push_back(outputs[i].aux_data(csr::kIndPtr).dptr_); - out_indices_shapes.push_back(outputs[i].aux_shape(rowsparse::kIdx).Size()); + out_indices_shapes.push_back(outputs[i].aux_shape(csr::kIdx).Size()); out_indptr_shapes.push_back(outputs[i].aux_shape(csr::kIndPtr).Size()); } } @@ -239,7 +242,7 @@ void CustomFComputeDispatcher(const std::string op_name, in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FCompute for custom operator '" << op_name << "'"; } - + if (state_ptr != nullptr) { // retrieve op state object created from CreateOpState CustomStatefulOpWrapper& op = state_ptr->get_state(); From 9a26ac301a54783d3318631bb36fc6d4d1c38a8a Mon Sep 17 00:00:00 2001 From: guanxinq Date: Mon, 2 Mar 2020 18:41:48 +0000 Subject: [PATCH 12/26] Fixed sparse output size(Refined) --- .../lib_custom_op/test_transsparse.py | 1 - .../lib_custom_op/transsparse_lib.cc | 9 +- include/mxnet/lib_api.h | 145 ++++++++++++------ src/c_api/c_api.cc | 78 ++++++---- 4 files changed, 152 insertions(+), 81 deletions(-) diff --git a/example/extensions/lib_custom_op/test_transsparse.py b/example/extensions/lib_custom_op/test_transsparse.py index e64ad2468483..139c61bea674 100644 --- a/example/extensions/lib_custom_op/test_transsparse.py +++ b/example/extensions/lib_custom_op/test_transsparse.py @@ -41,7 +41,6 @@ print(a.indices.asnumpy()) print(a.indptr.asnumpy()) -# To do: Fix segment fault. b = mx.nd.my_transsparse(a) print("B Type:", type(b)) print(b.data.asnumpy()) diff --git a/example/extensions/lib_custom_op/transsparse_lib.cc b/example/extensions/lib_custom_op/transsparse_lib.cc index 8ae78c2c32b1..29ef54f73885 100644 --- a/example/extensions/lib_custom_op/transsparse_lib.cc +++ b/example/extensions/lib_custom_op/transsparse_lib.cc @@ -52,14 +52,17 @@ void transpose(MXTensor src, MXTensor dst) { rowPtr[i] += rowPtr[i - 1]; } // Get the dst sparse matrix. + B->m_col_idx.resize(A->data_len); + B->m_row_ptr.resize(w + 1); + B->m_data.resize(A->data_len); for(int i = 0; i < h; i++) { for(int j = A->indptr[i]; j < A->indptr[i + 1]; j++) { int index = rowPtr[A->indices[j] + 1]++; - Bval[index] = Aval[j]; - B->indices[index] = i; + B->m_data[index] = Aval[j]; + B->m_col_idx[index] = i; } } - memcpy(B->indptr, rowPtr.data(), sizeof(int64_t) * (w + 1)); + memcpy(B->m_row_ptr.data(), rowPtr.data(), sizeof(int64_t) * (w + 1)); } } diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index e0241890addd..83423cc4edcb 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -241,7 +241,8 @@ enum MXReturnValue { MX_SUCCESS = 1, }; -struct MXSparse { +// For sparse input, read/write the data from NDarray via pointers. +struct MXInSparse { // Pointer to data. void *data{nullptr}; // length of (non-zero) data. @@ -273,6 +274,25 @@ struct MXSparse { } }; +// For sparse output, cannot read/write data from NDArray directly, since +// size is known during run time. Need a copy. +struct MXOutSparse { + // Data of sparse output. + std::vector &m_data; + + // To store aux data for sparse. + // For CSR, indices stores the col index of non-zero values. + // For row sparse, indices store row index of rows which have non-zero values. + std::vector &m_col_idx; + + // For CSR, indptr gives the start and end index of data for each row. + // For row sparse, indptr is empty. + std::vector &m_row_ptr; + + MXOutSparse(std::vector &data, std::vector &col_idx, std::vector &row_ptr) : + m_data(data), m_col_idx(col_idx), m_row_ptr(row_ptr) {} +}; + /*! * \brief Tensor data structure used by custom operator */ @@ -924,10 +944,11 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indices, void** out_indices, - void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); + void** in_indices, void** in_indptr, + int64_t* in_indices_shapes, int64_t* in_indptr_shapes, + std::vector>& tmp_data, + std::vector>& col_idx, + std::vector>& row_ptr); #define MXLIB_OPCALLMUTATEINPUTS_STR "_opCallMutateInputs" typedef int (*opCallMutateInputs_t)(mutateInputs_t mutate, const char* const* keys, @@ -951,10 +972,11 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indices, void** out_indices, - void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); + void** in_indices, void** in_indptr, + int64_t* in_indices_shapes, int64_t* in_indptr_shapes, + std::vector>& tmp_data, + std::vector>& col_idx, + std::vector>& row_ptr); #define MXLIB_PARTREGSIZE_STR "_partRegSize" typedef int (*partRegSize_t)(void); @@ -1160,9 +1182,11 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { + void** in_indices, void** in_indptr, + int64_t* in_indices_shapes, int64_t* in_indptr_shapes, + std::vector>& tmp_data, + std::vector>& col_idx, + std::vector>& row_ptr) { // create map of attributes from list std::map attrs; for (int i = 0; i < num; i++) { @@ -1171,63 +1195,51 @@ extern "C" { // create a vector of tensors for inputs std::vector inputs(num_in); + // create a vector for sparse inputs + std::vector in_sparse(num_in); - MXStorageType type; - void *data = nullptr; - void *data2 = nullptr; - MXSparse sparse; - MXSparse sparse2; for (int i = 0; i < num_in; i++) { // Dense representation. if(!in_indices_shapes) { - type = kDefaultStorage; - data = indata[i]; + inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], + inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); } // Sparse representation. else { - // To do: remove if else. + MXStorageType type; if(!in_indptr_shapes) { type = kRowSparseStorage; - sparse.set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); } else { type = kCSRStorage; - sparse.set(indata[i], inshapes[i], indims[i], in_indices[i], + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); } - data = (void*)(&sparse); + inputs[i].setTensor((void*)(&in_sparse[i]), (MXDType)intypes[i], inshapes[i], indims[i], + inIDs[i], {indev_type[i], indev_id[i]}, type); } - inputs[i].setTensor(data, (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}, type); } + // create a vector of tensors for outputs std::vector outputs(num_out); + // create a vector for sparse outputs + std::vector out_sparse; for (int i = 0; i < num_out; i++) { - //MXStorageType type2; - if(!out_indices_shapes) { - type = kDefaultStorage; - data2 = outdata[i]; + if(col_idx.empty()) { + outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], + outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } // Sparse representation. else { - // To do: remove if else. - if(!out_indptr_shapes) { - type = kRowSparseStorage; - sparse2.set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); - } - else { - type = kCSRStorage; - sparse2.set(outdata[i], outshapes[i], outdims[i], out_indices[i], - out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); - } - data2 = (void*)(&sparse2); - } - - outputs[i].setTensor(data2, (MXDType)outtypes[i], outshapes[i], outdims[i], + out_sparse.push_back(MXOutSparse(tmp_data[0], col_idx[0], row_ptr[0])); + MXStorageType type = row_ptr.empty() ? kRowSparseStorage : kCSRStorage; + outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, type); + } } - + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream); return fcomp(attrs, inputs, outputs, res); @@ -1299,21 +1311,56 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, - int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { + void** in_indices, void** in_indptr, + int64_t* in_indices_shapes, int64_t* in_indptr_shapes, + std::vector>& tmp_data, + std::vector>& col_idx, + std::vector>& row_ptr) { // create a vector of tensors for inputs std::vector inputs(num_in); + // create a vector for sparse inputs + std::vector in_sparse(num_in); + for (int i = 0; i < num_in; i++) { - inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], + // Dense representation. + if(!in_indices_shapes) { + inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); + } + // Sparse representation. + else { + MXStorageType type; + if(!in_indptr_shapes) { + type = kRowSparseStorage; + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); + } + else { + type = kCSRStorage; + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], + in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); + } + inputs[i].setTensor((void*)(&in_sparse[i]), (MXDType)intypes[i], inshapes[i], indims[i], + inIDs[i], {indev_type[i], indev_id[i]}, type); + } } // create a vector of tensors for outputs std::vector outputs(num_out); + // create a vector for sparse outputs + std::vector out_sparse; + for (int i = 0; i < num_out; i++) { - outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], + if(col_idx.empty()) { + outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); + } + // Sparse representation. + else { + out_sparse.push_back(MXOutSparse(tmp_data[0], col_idx[0], row_ptr[0])); + MXStorageType type = row_ptr.empty() ? kRowSparseStorage : kCSRStorage; + outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], + outIDs[i], {outdev_type[i], outdev_id[i]}, type); + } } OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, stream); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index fec999d40282..4af6a067d6f5 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -121,16 +121,15 @@ void CustomFComputeDispatcher(const std::string op_name, std::vector in_dev_type, out_dev_type; std::vector in_dev_id, out_dev_id; - // Extra data for sparse representation. - std::vector in_indices, out_indices; - std::vector in_indptr, out_indptr; - std::vector in_indices_shapes, out_indices_shapes; - std::vector in_indptr_shapes, out_indptr_shapes; + // Extra data for sparse inputs. + std::vector in_indices; + std::vector in_indptr; + std::vector in_indices_shapes; + std::vector in_indptr_shapes; // convert inputs/outpus NDArray to C types to be passed to lib_api.h for (size_t i = 0; i < inputs.size(); i++) { in_data.push_back(inputs[i].data().dptr_); - // To do: remove. ndims = 2. 3*5. in_shapes.push_back(inputs[i].shape().data()); in_dims.push_back(inputs[i].shape().ndim()); in_types.push_back(inputs[i].dtype()); @@ -151,9 +150,12 @@ void CustomFComputeDispatcher(const std::string op_name, } } + // Extra data for sparse outputs. + // To do: fix data type. + std::vector> tmp_data; + std::vector> col_index, row_ptr; + for (size_t i = 0; i < outputs.size(); i++) { - // To do: remove hardcode. - outputs[i].CheckAndAlloc({mshadow::Shape1(5 + 1), mshadow::Shape1(9)}); out_data.push_back(outputs[i].data().dptr_); out_shapes.push_back(outputs[i].shape().data()); out_dims.push_back(outputs[i].shape().ndim()); @@ -163,17 +165,15 @@ void CustomFComputeDispatcher(const std::string op_name, out_dev_type.push_back(ctx_str); out_dev_id.push_back(outputs[i].ctx().real_dev_id()); - // To do: Find ways to handle out dimenson is unknown for sparse. if(outputs[i].storage_type() == mxnet::kRowSparseStorage) { - out_indices.push_back(outputs[i].aux_data(rowsparse::kIdx).dptr_); - out_indices_shapes.push_back(outputs[i].aux_shape(rowsparse::kIdx).Size()); + tmp_data.push_back(std::vector()); + col_index.push_back(std::vector()); } else if(outputs[i].storage_type() == mxnet::kCSRStorage) { - out_indices.push_back(outputs[i].aux_data(csr::kIdx).dptr_); - out_indptr.push_back(outputs[i].aux_data(csr::kIndPtr).dptr_); - out_indices_shapes.push_back(outputs[i].aux_shape(csr::kIdx).Size()); - out_indptr_shapes.push_back(outputs[i].aux_shape(csr::kIndPtr).Size()); - } + tmp_data.push_back(std::vector()); + col_index.push_back(std::vector()); + row_ptr.push_back(std::vector()); + } } // get memory resource and mxnet backend streams @@ -237,12 +237,11 @@ void CustomFComputeDispatcher(const std::string op_name, out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), - in_indices_shapes.data(), out_indices_shapes.data(), - in_indptr_shapes.data(), out_indptr_shapes.data())) + in_indices.data(), in_indptr.data(), in_indices_shapes.data(), + in_indptr_shapes.data(), tmp_data, col_index, row_ptr)) << "Error calling FCompute for custom operator '" << op_name << "'"; } - + if (state_ptr != nullptr) { // retrieve op state object created from CreateOpState CustomStatefulOpWrapper& op = state_ptr->get_state(); @@ -259,11 +258,37 @@ void CustomFComputeDispatcher(const std::string op_name, out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), - in_indices_shapes.data(), out_indices_shapes.data(), - in_indptr_shapes.data(), out_indptr_shapes.data())) + in_indices.data(), in_indptr.data(), + in_indices_shapes.data(), in_indptr_shapes.data(), + tmp_data, col_index, row_ptr)) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"; } + + /* + std::cout << "Check Here:" << std::endl; + for(int i = 0; i < tmp_data[0].size(); i++) + std::cout << tmp_data[0][i] << " "; + std::cout << std::endl; + for(int i = 0; i < col_index[0].size(); i++) + std::cout << col_index[0][i] << " "; + std::cout << std::endl; + for(int i = 0; i < row_ptr[0].size(); i++) + std::cout << row_ptr[0][i] << " "; + std::cout << std::endl; + */ + // Alloc space for sparse output and copy data to saprse NDArray. + for (size_t i = 0; i < outputs.size(); i++) { + if (outputs[i].storage_type() == mxnet::kDefaultStorage) continue; + if (outputs[i].storage_type() == mxnet::kRowSparseStorage) { + outputs[i].CheckAndAlloc({mshadow::Shape1(col_index[i].size())}); + } + else if (outputs[i].storage_type() == mxnet::kCSRStorage) { + outputs[i].CheckAndAlloc({mshadow::Shape1(row_ptr[i].size()), mshadow::Shape1(col_index[i].size())}); + memcpy(outputs[i].aux_data(csr::kIndPtr).dptr_, row_ptr[i].data(), sizeof(int64_t) * row_ptr[i].size()); + } + memcpy(outputs[i].data().dptr_, tmp_data[i].data(), sizeof(float) * tmp_data[i].size()); + memcpy(outputs[i].aux_data(csr::kIdx).dptr_, col_index[i].data(), sizeof(int64_t) * col_index[i].size()); + } } /*! @@ -610,11 +635,8 @@ int MXLoadLib(const char *path) { DispatchMode* dispatch_mode, std::vector* in_stypes, std::vector* out_stypes) { - // TODO(ziyimu): remove this dense enforce check after supporting sparse tensor - //CHECK(mxnet::common::ContainsOnlyStorage(*in_stypes, mxnet::kDefaultStorage)) - //<< "Error input tensors are not dense for custom operator '" << name_str << "'"; - // set outputs as dense - return op::storage_type_assign(out_stypes, mxnet::kCSRStorage, + return op::storage_type_assign(out_stypes, + static_cast(in_stypes->at(0)), dispatch_mode, DispatchMode::kFComputeEx); }; From 041470b667cf6fc71e4ff9f5511cbac8ddd21cfe Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 3 Mar 2020 00:58:59 +0000 Subject: [PATCH 13/26] Add tests for symbolic and stateful ops --- .../lib_custom_op/test_transsparse.py | 44 +++++++++++++++---- .../lib_custom_op/transsparse_lib.cc | 12 ++--- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/example/extensions/lib_custom_op/test_transsparse.py b/example/extensions/lib_custom_op/test_transsparse.py index 139c61bea674..de5369e81168 100644 --- a/example/extensions/lib_custom_op/test_transsparse.py +++ b/example/extensions/lib_custom_op/test_transsparse.py @@ -36,13 +36,41 @@ a = mx.nd.array([[1,3,0,2,1],[0,1,0,0,0],[0,2,4,5,3]]) a = a.tostype('csr') -print(type(a)) -print(a.data.asnumpy()) -print(a.indices.asnumpy()) -print(a.indptr.asnumpy()) +print("--------Input CSR Array---------") +print("data:", a.data.asnumpy()) +print("indices:", a.indices.asnumpy()) +print("indptr:", a.indptr.asnumpy()) +print("--------Start NDArray Compute---------") b = mx.nd.my_transsparse(a) -print("B Type:", type(b)) -print(b.data.asnumpy()) -print(b.indices.asnumpy()) -print(b.indptr.asnumpy()) +print("Compute Results:") +print("data:", b.data.asnumpy()) +print("indices:", b.indices.asnumpy()) +print("indptr:", b.indptr.asnumpy()) + +print("Stateful Compute Result:") +c = mx.nd.state_transsparse(a, test_kw=100) +print("data:", c.data.asnumpy()) +print("indices:", c.indices.asnumpy()) +print("indptr:", c.indptr.asnumpy()) + +print("--------start symbolic compute--------") +d = mx.sym.Variable('d') +e = mx.sym.my_transsparse(d) +f = mx.sym.state_transsparse(d, test_kw=200) + +exe = e.bind(ctx=mx.cpu(),args={'d':a}) +exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) +out = exe.forward() +print("Compute Results:") +print("data:", out[0].data.asnumpy()) +print("indices:", out[0].indices.asnumpy()) +print("indptr:", out[0].indptr.asnumpy()) + +out2 = exe2.forward() +out2 = exe2.forward() +print("Stateful Compute Result:") +print("data:", out2[0].data.asnumpy()) +print("indices:", out2[0].indices.asnumpy()) +print("indptr:", out2[0].indptr.asnumpy()) + diff --git a/example/extensions/lib_custom_op/transsparse_lib.cc b/example/extensions/lib_custom_op/transsparse_lib.cc index 29ef54f73885..ce9919fb1eb8 100644 --- a/example/extensions/lib_custom_op/transsparse_lib.cc +++ b/example/extensions/lib_custom_op/transsparse_lib.cc @@ -27,30 +27,30 @@ #include "lib_api.h" void transpose(MXTensor src, MXTensor dst) { - typedef MXSparse DType; - DType* A = src.data(); - DType* B = dst.data(); - + MXInSparse* A = src.data(); + MXOutSparse* B = dst.data(); + std::vector shape = src.shape; int64_t h = shape[0]; int64_t w = shape[1]; - if(src.stype == kRowSparseStorage) { //To do: add implementation. } else if(src.stype == kCSRStorage) { // To do: fix type. float *Aval = (float*) (A->data); - float *Bval = (float*) (B->data); std::vector rowPtr(w + 2, 0); + // count column for(int i = 0; i < A->data_len; i++) { rowPtr[A->indices[i] + 2]++; } + // Accumulated sum for(int i = 2; i < rowPtr.size(); i++) { rowPtr[i] += rowPtr[i - 1]; } + // Get the dst sparse matrix. B->m_col_idx.resize(A->data_len); B->m_row_ptr.resize(w + 1); From a3b175be95dda7a63aa02fc13741a8d0b1309b75 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 3 Mar 2020 21:05:07 +0000 Subject: [PATCH 14/26] Added a sample for row sparse transpose --- example/extensions/lib_custom_op/Makefile | 11 +- .../{test_transsparse.py => test_transcsr.py} | 12 +- .../lib_custom_op/test_transrowsp.py | 70 ++++++++ .../{transsparse_lib.cc => transcsr_lib.cc} | 17 +- .../lib_custom_op/transrowsp_lib.cc | 169 ++++++++++++++++++ include/mxnet/lib_api.h | 4 +- src/c_api/c_api.cc | 3 +- 7 files changed, 264 insertions(+), 22 deletions(-) rename example/extensions/lib_custom_op/{test_transsparse.py => test_transcsr.py} (89%) create mode 100644 example/extensions/lib_custom_op/test_transrowsp.py rename example/extensions/lib_custom_op/{transsparse_lib.cc => transcsr_lib.cc} (93%) create mode 100644 example/extensions/lib_custom_op/transrowsp_lib.cc diff --git a/example/extensions/lib_custom_op/Makefile b/example/extensions/lib_custom_op/Makefile index b4ceafc9fbdf..f1e0bc5eabfe 100644 --- a/example/extensions/lib_custom_op/Makefile +++ b/example/extensions/lib_custom_op/Makefile @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -all: gemm_lib relu_lib transsparse_lib +all: gemm_lib relu_lib transcsr_lib transrowsp_lib gemm_lib: g++ -shared -fPIC -std=c++11 gemm_lib.cc -o libgemm_lib.so -I ../../../include/mxnet @@ -23,8 +23,11 @@ gemm_lib: relu_lib: nvcc -shared -std=c++11 -Xcompiler -fPIC relu_lib.cu -o librelu_lib.so -I ../../../include/mxnet -transsparse_lib: - g++ -shared -fPIC -std=c++11 transsparse_lib.cc -o libtranssparse_lib.so -I ../../../include/mxnet +transcsr_lib: + g++ -shared -fPIC -std=c++11 transcsr_lib.cc -o libtranscsr_lib.so -I ../../../include/mxnet + +transrowsp_lib: + g++ -shared -fPIC -std=c++11 transrowsp_lib.cc -o libtransrowsp_lib.so -I ../../../include/mxnet clean: - rm -rf libgemm_lib.so librelu_lib.so libtranssparse_lib.so + rm -rf libgemm_lib.so librelu_lib.so libtranscsr_lib.so libtransrowsp_lib.so diff --git a/example/extensions/lib_custom_op/test_transsparse.py b/example/extensions/lib_custom_op/test_transcsr.py similarity index 89% rename from example/extensions/lib_custom_op/test_transsparse.py rename to example/extensions/lib_custom_op/test_transcsr.py index de5369e81168..eb8028833974 100644 --- a/example/extensions/lib_custom_op/test_transsparse.py +++ b/example/extensions/lib_custom_op/test_transcsr.py @@ -28,10 +28,10 @@ #load library if (os.name=='posix'): - path = os.path.abspath('libtranssparse_lib.so') + path = os.path.abspath('libtranscsr_lib.so') mx.library.load(path) elif (os.name=='nt'): - path = os.path.abspath('libtranssparse_lib.dll') + path = os.path.abspath('libtranscsr_lib.dll') mx.library.load(path) a = mx.nd.array([[1,3,0,2,1],[0,1,0,0,0],[0,2,4,5,3]]) @@ -42,22 +42,22 @@ print("indptr:", a.indptr.asnumpy()) print("--------Start NDArray Compute---------") -b = mx.nd.my_transsparse(a) +b = mx.nd.my_transcsr(a) print("Compute Results:") print("data:", b.data.asnumpy()) print("indices:", b.indices.asnumpy()) print("indptr:", b.indptr.asnumpy()) print("Stateful Compute Result:") -c = mx.nd.state_transsparse(a, test_kw=100) +c = mx.nd.state_transcsr(a, test_kw=100) print("data:", c.data.asnumpy()) print("indices:", c.indices.asnumpy()) print("indptr:", c.indptr.asnumpy()) print("--------start symbolic compute--------") d = mx.sym.Variable('d') -e = mx.sym.my_transsparse(d) -f = mx.sym.state_transsparse(d, test_kw=200) +e = mx.sym.my_transcsr(d) +f = mx.sym.state_transcsr(d, test_kw=200) exe = e.bind(ctx=mx.cpu(),args={'d':a}) exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) diff --git a/example/extensions/lib_custom_op/test_transrowsp.py b/example/extensions/lib_custom_op/test_transrowsp.py new file mode 100644 index 000000000000..a4d82e167232 --- /dev/null +++ b/example/extensions/lib_custom_op/test_transrowsp.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable=arguments-differ + +# This test checks dynamic loading of custom library into MXNet +# and checks end to end compute of a simple 2D gemm custom op + +import mxnet as mx +import os + +#load library +if (os.name=='posix'): + path = os.path.abspath('libtransrowsp_lib.so') + mx.library.load(path) +elif (os.name=='nt'): + path = os.path.abspath('libtransrowsp_lib.dll') + mx.library.load(path) + +a = mx.nd.array([[1,2,3],[0,0,0],[4,0,5],[0,0,0],[0,0,0]]) +a = a.tostype('row_sparse') +print("--------Input CSR Array---------") +print("data:", a.data.asnumpy()) +print("indices:", a.indices.asnumpy()) + +print("--------Start NDArray Compute---------") +b = mx.nd.my_transrowsp(a) +print("Compute Results:") +print("data:", b.data.asnumpy()) +print("indices:", b.indices.asnumpy()) + +print("Stateful Compute Result:") +c = mx.nd.state_transrowsp(a, test_kw=100) +print("data:", c.data.asnumpy()) +print("indices:", c.indices.asnumpy()) + +print("--------start symbolic compute--------") +d = mx.sym.Variable('d') +e = mx.sym.my_transrowsp(d) +f = mx.sym.state_transrowsp(d, test_kw=200) + +exe = e.bind(ctx=mx.cpu(),args={'d':a}) +exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) +out = exe.forward() +print("Compute Results:") +print("data:", out[0].data.asnumpy()) +print("indices:", out[0].indices.asnumpy()) + +out2 = exe2.forward() +out2 = exe2.forward() +print("Stateful Compute Result:") +print("data:", out2[0].data.asnumpy()) +print("indices:", out2[0].indices.asnumpy()) diff --git a/example/extensions/lib_custom_op/transsparse_lib.cc b/example/extensions/lib_custom_op/transcsr_lib.cc similarity index 93% rename from example/extensions/lib_custom_op/transsparse_lib.cc rename to example/extensions/lib_custom_op/transcsr_lib.cc index ce9919fb1eb8..00873433223a 100644 --- a/example/extensions/lib_custom_op/transsparse_lib.cc +++ b/example/extensions/lib_custom_op/transcsr_lib.cc @@ -33,10 +33,7 @@ void transpose(MXTensor src, MXTensor dst) { std::vector shape = src.shape; int64_t h = shape[0]; int64_t w = shape[1]; - if(src.stype == kRowSparseStorage) { - //To do: add implementation. - } - else if(src.stype == kCSRStorage) { + if(src.stype == kCSRStorage) { // To do: fix type. float *Aval = (float*) (A->data); std::vector rowPtr(w + 2, 0); @@ -125,7 +122,7 @@ MXReturnValue inferShape(std::map attrs, return MX_SUCCESS; } -REGISTER_OP(my_transsparse) +REGISTER_OP(my_transcsr) .setForward(forward, "cpu") .setBackward(backward, "cpu") .setParseAttrs(parseAttrs) @@ -134,9 +131,9 @@ REGISTER_OP(my_transsparse) /* ------------------------------------------------------------------------- */ -class MyStatefulTransSparse : public CustomStatefulOp { +class MyStatefulTransCSR : public CustomStatefulOp { public: - explicit MyStatefulTransSparse(int count) : count(count) {} + explicit MyStatefulTransCSR(int count) : count(count) {} MXReturnValue Forward(std::vector inputs, std::vector outputs, @@ -153,7 +150,7 @@ class MyStatefulTransSparse : public CustomStatefulOp { return backward(attrs, inputs, outputs, op_res); } - ~MyStatefulTransSparse() {} + ~MyStatefulTransCSR() {} private: int count; @@ -164,7 +161,7 @@ MXReturnValue createOpState(std::map attrs, // testing passing of keyword arguments int count = attrs.count("test_kw") > 0 ? std::stoi(attrs["test_kw"]) : 0; // creating stateful operator instance - *op_inst = new MyStatefulTransSparse(count); + *op_inst = new MyStatefulTransCSR(count); std::cout << "Info: stateful operator created" << std::endl; return MX_SUCCESS; } @@ -175,7 +172,7 @@ MXReturnValue mutateInputs(std::map attrs, return MX_SUCCESS; } -REGISTER_OP(state_transsparse) +REGISTER_OP(state_transcsr) .setParseAttrs(parseAttrs) .setInferType(inferType) .setInferShape(inferShape) diff --git a/example/extensions/lib_custom_op/transrowsp_lib.cc b/example/extensions/lib_custom_op/transrowsp_lib.cc new file mode 100644 index 000000000000..73eb38a6700e --- /dev/null +++ b/example/extensions/lib_custom_op/transrowsp_lib.cc @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2020 by Contributors + * \file transsparse_lib.cc + * \brief Sample 2D transpose custom operator. + */ + +#include +#include "lib_api.h" + +void transpose(MXTensor src, MXTensor dst) { + MXInSparse* A = src.data(); + MXOutSparse* B = dst.data(); + + std::vector shape = src.shape; + int64_t h = shape[0]; + int64_t w = shape[1]; + if(src.stype == kRowSparseStorage) { + //To do: add implementation. + std::vector t_data{1,0,4,0,0,2,0,0,0,0,3,0,5,0,0}; + std::vector t_idx{0,1,2}; + B->m_data = t_data; + B->m_col_idx = t_idx; + } +} + +MXReturnValue forward(std::map attrs, + std::vector inputs, + std::vector outputs, + OpResource res) { + + // The data types and storage types of inputs and outputs should be the same. + if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) + return MX_FAIL; + + transpose(inputs[0], outputs[0]); + return MX_SUCCESS; +} + +MXReturnValue backward(std::map attrs, + std::vector inputs, + std::vector outputs, + OpResource res) { + return MX_SUCCESS; +} + +MXReturnValue parseAttrs(std::map attrs, int* num_in, int* num_out) { + *num_in = 1; + *num_out = 1; + return MX_SUCCESS; +} + +MXReturnValue inferType(std::map attrs, + std::vector &intypes, + std::vector &outtypes) { + // validate inputs + if (intypes.size() != 1) { + std::cout << "Expected 1 inputs to inferType" << std::endl; + return MX_FAIL; + } + for (unsigned i = 0; i < intypes.size(); i++) { + if (intypes[i] != kFloat32) { + std::cout << "Expected input " << i << " to have float32 type" << std::endl; + return MX_FAIL; + } + } + + outtypes[0] = intypes[0]; + return MX_SUCCESS; +} + +MXReturnValue inferShape(std::map attrs, + std::vector> &inshapes, + std::vector> &outshapes) { + // validate inputs + if (inshapes.size() != 1) { + std::cout << "Expected 1 inputs to inferShape" << std::endl; + return MX_FAIL; + } + + outshapes[0].push_back(inshapes[0][1]); + outshapes[0].push_back(inshapes[0][0]); + return MX_SUCCESS; +} + +REGISTER_OP(my_transrowsp) +.setForward(forward, "cpu") +.setBackward(backward, "cpu") +.setParseAttrs(parseAttrs) +.setInferType(inferType) +.setInferShape(inferShape); + +/* ------------------------------------------------------------------------- */ + +class MyStatefulTransRowSP : public CustomStatefulOp { + public: + explicit MyStatefulTransRowSP(int count) : count(count) {} + + MXReturnValue Forward(std::vector inputs, + std::vector outputs, + OpResource op_res) { + std::cout << "Info: keyword + number of forward: " << ++count << std::endl; + std::map attrs; + return forward(attrs, inputs, outputs, op_res); + } + + MXReturnValue Backward(std::vector inputs, + std::vector outputs, + OpResource op_res) { + std::map attrs; + return backward(attrs, inputs, outputs, op_res); + } + + ~MyStatefulTransRowSP() {} + + private: + int count; +}; + +MXReturnValue createOpState(std::map attrs, + CustomStatefulOp** op_inst) { + // testing passing of keyword arguments + int count = attrs.count("test_kw") > 0 ? std::stoi(attrs["test_kw"]) : 0; + // creating stateful operator instance + *op_inst = new MyStatefulTransRowSP(count); + std::cout << "Info: stateful operator created" << std::endl; + return MX_SUCCESS; +} + +MXReturnValue mutateInputs(std::map attrs, + std::vector &input_indices) { + // input_indices.push_back(1); // mark mutate input + return MX_SUCCESS; +} + +REGISTER_OP(state_transrowsp) +.setParseAttrs(parseAttrs) +.setInferType(inferType) +.setInferShape(inferShape) +.setMutateInputs(mutateInputs) +.setCreateOpState(createOpState, "cpu"); + +MXReturnValue initialize(int version) { + if (version >= 10400) { + std::cout << "MXNet version " << version << " supported" << std::endl; + return MX_SUCCESS; + } else { + std::cout << "MXNet version " << version << " not supported" << std::endl; + return MX_FAIL; + } +} diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 83423cc4edcb..1d89f3666ff8 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -262,7 +262,9 @@ struct MXInSparse { void set(void *data_ptr, const int64_t* dims, int ndims, void *idx, int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0) { data = data_ptr; - data_len = num_idx; + // If CSR, num of non-zero value is num_idx, + // If row sparse, num of value is num_idx * width. + data_len = idx_ptr ? num_idx : num_idx * dims[1]; indices = (int64_t*)idx; indices_len = num_idx; diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 4af6a067d6f5..6713209444c4 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -281,13 +281,14 @@ void CustomFComputeDispatcher(const std::string op_name, if (outputs[i].storage_type() == mxnet::kDefaultStorage) continue; if (outputs[i].storage_type() == mxnet::kRowSparseStorage) { outputs[i].CheckAndAlloc({mshadow::Shape1(col_index[i].size())}); + memcpy(outputs[i].aux_data(rowsparse::kIdx).dptr_, col_index[i].data(), sizeof(int64_t) * col_index[i].size()); } else if (outputs[i].storage_type() == mxnet::kCSRStorage) { outputs[i].CheckAndAlloc({mshadow::Shape1(row_ptr[i].size()), mshadow::Shape1(col_index[i].size())}); memcpy(outputs[i].aux_data(csr::kIndPtr).dptr_, row_ptr[i].data(), sizeof(int64_t) * row_ptr[i].size()); + memcpy(outputs[i].aux_data(csr::kIdx).dptr_, col_index[i].data(), sizeof(int64_t) * col_index[i].size()); } memcpy(outputs[i].data().dptr_, tmp_data[i].data(), sizeof(float) * tmp_data[i].size()); - memcpy(outputs[i].aux_data(csr::kIdx).dptr_, col_index[i].data(), sizeof(int64_t) * col_index[i].size()); } } From 99d00c2ab6255c22df4e487584874065ceede3b3 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 3 Mar 2020 22:15:49 +0000 Subject: [PATCH 15/26] Added real row sparse transpose --- .../lib_custom_op/test_transrowsp.py | 1 + .../lib_custom_op/transrowsp_lib.cc | 26 ++++++++-- include/mxnet/lib_api.h | 48 +++++++++---------- src/c_api/c_api.cc | 18 ++----- 4 files changed, 49 insertions(+), 44 deletions(-) diff --git a/example/extensions/lib_custom_op/test_transrowsp.py b/example/extensions/lib_custom_op/test_transrowsp.py index a4d82e167232..217efc016f66 100644 --- a/example/extensions/lib_custom_op/test_transrowsp.py +++ b/example/extensions/lib_custom_op/test_transrowsp.py @@ -35,6 +35,7 @@ mx.library.load(path) a = mx.nd.array([[1,2,3],[0,0,0],[4,0,5],[0,0,0],[0,0,0]]) +# a = mx.nd.array([[1,3,0,2,1],[0,1,0,0,0],[0,2,4,5,3]]) a = a.tostype('row_sparse') print("--------Input CSR Array---------") print("data:", a.data.asnumpy()) diff --git a/example/extensions/lib_custom_op/transrowsp_lib.cc b/example/extensions/lib_custom_op/transrowsp_lib.cc index 73eb38a6700e..6a354f93fa52 100644 --- a/example/extensions/lib_custom_op/transrowsp_lib.cc +++ b/example/extensions/lib_custom_op/transrowsp_lib.cc @@ -34,11 +34,27 @@ void transpose(MXTensor src, MXTensor dst) { int64_t h = shape[0]; int64_t w = shape[1]; if(src.stype == kRowSparseStorage) { - //To do: add implementation. - std::vector t_data{1,0,4,0,0,2,0,0,0,0,3,0,5,0,0}; - std::vector t_idx{0,1,2}; - B->m_data = t_data; - B->m_col_idx = t_idx; + std::map> mp; + float *Aval = (float*) (A->data); + for(int i = 0; i < A->data_len; i++) { + int row = i / w; + int col = i % w; + row = A->indices[row]; + if(Aval[i] != 0) { + if(mp.find(col) == mp.end()) { + mp[col] = std::vector(h, 0); + mp[col][row] = Aval[i]; + } + else { + mp[col][row] = Aval[i]; + } + } + } + + for(auto i : mp) { + B->m_col_idx.push_back(i.first); + B->m_data.insert(B->m_data.end(), i.second.begin(), i.second.end()); + } } } diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 1d89f3666ff8..2e405924479c 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -947,10 +947,10 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, void** in_indices, void** in_indptr, - int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, - std::vector>& col_idx, - std::vector>& row_ptr); + int64_t* in_indices_shapes, int64_t* in_indptr_shapes, + std::vector>& tmp_data, + std::vector>& col_idx, + std::vector>& row_ptr); #define MXLIB_OPCALLMUTATEINPUTS_STR "_opCallMutateInputs" typedef int (*opCallMutateInputs_t)(mutateInputs_t mutate, const char* const* keys, @@ -976,7 +976,7 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, void** in_indices, void** in_indptr, int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, + std::vector>& tmp_data, std::vector>& col_idx, std::vector>& row_ptr); @@ -1185,8 +1185,8 @@ extern "C" { xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, void** in_indices, void** in_indptr, - int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, + int64_t* in_indices_shapes, int64_t* in_indptr_shapes, + std::vector>& tmp_data, std::vector>& col_idx, std::vector>& row_ptr) { // create map of attributes from list @@ -1203,20 +1203,20 @@ extern "C" { for (int i = 0; i < num_in; i++) { // Dense representation. if(!in_indices_shapes) { - inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); + inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], + inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); } // Sparse representation. else { - MXStorageType type; - if(!in_indptr_shapes) { + MXStorageType type; + if(!in_indptr_shapes) { type = kRowSparseStorage; - in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); + in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); } else { type = kCSRStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], - in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); + in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); } inputs[i].setTensor((void*)(&in_sparse[i]), (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, type); @@ -1230,15 +1230,15 @@ extern "C" { for (int i = 0; i < num_out; i++) { if(col_idx.empty()) { - outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); + outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], + outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } // Sparse representation. else { - out_sparse.push_back(MXOutSparse(tmp_data[0], col_idx[0], row_ptr[0])); - MXStorageType type = row_ptr.empty() ? kRowSparseStorage : kCSRStorage; - outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, type); + out_sparse.push_back(MXOutSparse(tmp_data[0], col_idx[0], row_ptr[0])); + MXStorageType type = row_ptr.empty() ? kRowSparseStorage : kCSRStorage; + outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], + outIDs[i], {outdev_type[i], outdev_id[i]}, type); } } @@ -1315,7 +1315,7 @@ extern "C" { xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, void** in_indices, void** in_indptr, int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, + std::vector>& tmp_data, std::vector>& col_idx, std::vector>& row_ptr) { // create a vector of tensors for inputs @@ -1327,7 +1327,7 @@ extern "C" { // Dense representation. if(!in_indices_shapes) { inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); + inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); } // Sparse representation. else { @@ -1339,7 +1339,7 @@ extern "C" { else { type = kCSRStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], - in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); + in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); } inputs[i].setTensor((void*)(&in_sparse[i]), (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, type); @@ -1354,14 +1354,14 @@ extern "C" { for (int i = 0; i < num_out; i++) { if(col_idx.empty()) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); + outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } // Sparse representation. else { out_sparse.push_back(MXOutSparse(tmp_data[0], col_idx[0], row_ptr[0])); MXStorageType type = row_ptr.empty() ? kRowSparseStorage : kCSRStorage; outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, type); + outIDs[i], {outdev_type[i], outdev_id[i]}, type); } } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 6713209444c4..a5fb090f0d32 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -238,7 +238,7 @@ void CustomFComputeDispatcher(const std::string op_name, out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, in_indices.data(), in_indptr.data(), in_indices_shapes.data(), - in_indptr_shapes.data(), tmp_data, col_index, row_ptr)) + in_indptr_shapes.data(), tmp_data, col_index, row_ptr)) << "Error calling FCompute for custom operator '" << op_name << "'"; } @@ -260,22 +260,10 @@ void CustomFComputeDispatcher(const std::string op_name, cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, in_indices.data(), in_indptr.data(), in_indices_shapes.data(), in_indptr_shapes.data(), - tmp_data, col_index, row_ptr)) + tmp_data, col_index, row_ptr)) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"; } - /* - std::cout << "Check Here:" << std::endl; - for(int i = 0; i < tmp_data[0].size(); i++) - std::cout << tmp_data[0][i] << " "; - std::cout << std::endl; - for(int i = 0; i < col_index[0].size(); i++) - std::cout << col_index[0][i] << " "; - std::cout << std::endl; - for(int i = 0; i < row_ptr[0].size(); i++) - std::cout << row_ptr[0][i] << " "; - std::cout << std::endl; - */ // Alloc space for sparse output and copy data to saprse NDArray. for (size_t i = 0; i < outputs.size(); i++) { if (outputs[i].storage_type() == mxnet::kDefaultStorage) continue; @@ -637,7 +625,7 @@ int MXLoadLib(const char *path) { std::vector* in_stypes, std::vector* out_stypes) { return op::storage_type_assign(out_stypes, - static_cast(in_stypes->at(0)), + static_cast(in_stypes->at(0)), dispatch_mode, DispatchMode::kFComputeEx); }; From 60e6753dd7f763afdee92f4e9d6eb738b819449f Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 10 Mar 2020 23:40:13 +0000 Subject: [PATCH 16/26] Fix output size issue by adding lambda for CheckAndAlloc() --- .../extensions/lib_custom_op/transcsr_lib.cc | 26 ++-- .../lib_custom_op/transrowsp_lib.cc | 17 ++- include/mxnet/lib_api.h | 144 ++++++++++-------- src/c_api/c_api.cc | 81 +++++----- 4 files changed, 147 insertions(+), 121 deletions(-) diff --git a/example/extensions/lib_custom_op/transcsr_lib.cc b/example/extensions/lib_custom_op/transcsr_lib.cc index 00873433223a..ab82523ab67e 100644 --- a/example/extensions/lib_custom_op/transcsr_lib.cc +++ b/example/extensions/lib_custom_op/transcsr_lib.cc @@ -26,40 +26,36 @@ #include #include "lib_api.h" -void transpose(MXTensor src, MXTensor dst) { - MXInSparse* A = src.data(); - MXOutSparse* B = dst.data(); - +void transpose(MXTensor src, MXTensor dst, OpResource res) { + MXSparse* A = src.data(); + MXSparse* B = dst.data(); std::vector shape = src.shape; int64_t h = shape[0]; int64_t w = shape[1]; if(src.stype == kCSRStorage) { - // To do: fix type. float *Aval = (float*) (A->data); std::vector rowPtr(w + 2, 0); - // count column for(int i = 0; i < A->data_len; i++) { rowPtr[A->indices[i] + 2]++; } - // Accumulated sum for(int i = 2; i < rowPtr.size(); i++) { rowPtr[i] += rowPtr[i - 1]; } - // Get the dst sparse matrix. - B->m_col_idx.resize(A->data_len); - B->m_row_ptr.resize(w + 1); - B->m_data.resize(A->data_len); + // Alloc memory for sparse data, where 0 is the index + // of B in output vector. + res.alloc_ndarray(B, 0, A->data_len, w + 1); + float *Bval = (float*) (B->data); for(int i = 0; i < h; i++) { for(int j = A->indptr[i]; j < A->indptr[i + 1]; j++) { int index = rowPtr[A->indices[j] + 1]++; - B->m_data[index] = Aval[j]; - B->m_col_idx[index] = i; + Bval[index] = Aval[j]; + B->indices[index] = i; } } - memcpy(B->m_row_ptr.data(), rowPtr.data(), sizeof(int64_t) * (w + 1)); + memcpy(B->indptr, rowPtr.data(), sizeof(int64_t) * (w + 1)); } } @@ -72,7 +68,7 @@ MXReturnValue forward(std::map attrs, if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) return MX_FAIL; - transpose(inputs[0], outputs[0]); + transpose(inputs[0], outputs[0], res); return MX_SUCCESS; } diff --git a/example/extensions/lib_custom_op/transrowsp_lib.cc b/example/extensions/lib_custom_op/transrowsp_lib.cc index 6a354f93fa52..42187be5ac48 100644 --- a/example/extensions/lib_custom_op/transrowsp_lib.cc +++ b/example/extensions/lib_custom_op/transrowsp_lib.cc @@ -26,9 +26,9 @@ #include #include "lib_api.h" -void transpose(MXTensor src, MXTensor dst) { - MXInSparse* A = src.data(); - MXOutSparse* B = dst.data(); +void transpose(MXTensor src, MXTensor dst, OpResource res) { + MXSparse* A = src.data(); + MXSparse* B = dst.data(); std::vector shape = src.shape; int64_t h = shape[0]; @@ -51,9 +51,14 @@ void transpose(MXTensor src, MXTensor dst) { } } + res.alloc_ndarray(B, 0, mp.size()); + float *Bval = (float*) (B->data); + int didx = 0, iidx = 0; for(auto i : mp) { - B->m_col_idx.push_back(i.first); - B->m_data.insert(B->m_data.end(), i.second.begin(), i.second.end()); + B->indices[iidx++] = i.first; + for(auto j : i.second) { + Bval[didx++] = j; + } } } } @@ -67,7 +72,7 @@ MXReturnValue forward(std::map attrs, if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) return MX_FAIL; - transpose(inputs[0], outputs[0]); + transpose(inputs[0], outputs[0], res); return MX_SUCCESS; } diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 2e405924479c..c7d6aa7deffc 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -242,29 +242,33 @@ enum MXReturnValue { }; // For sparse input, read/write the data from NDarray via pointers. -struct MXInSparse { +struct MXSparse { // Pointer to data. void *data{nullptr}; // length of (non-zero) data. int64_t data_len; // To store aux data for sparse. - // For CSR, indices stores the col index of non-zero values. - // For row sparse, indices store row index of rows which have non-zero values. + // For CSR, indices stores the col index of non-zero elements. + // For row sparse, indices store row index of rows which have non-zero elements. int64_t* indices; int64_t indices_len; // For CSR, indptr gives the start and end index of data for each row. - // For row sparse, indptr is empty. - int64_t* indptr; + // For row sparse, indptr is not used. + int64_t* indptr = nullptr; int64_t indptr_len; void set(void *data_ptr, const int64_t* dims, int ndims, void *idx, int64_t num_idx, void *idx_ptr = nullptr, int64_t num_idx_ptr = 0) { data = data_ptr; - // If CSR, num of non-zero value is num_idx, - // If row sparse, num of value is num_idx * width. - data_len = idx_ptr ? num_idx : num_idx * dims[1]; + // If CSR, num of non-zero elemets is num_idx, + // If row sparse, num of elements is num_idx * width. + data_len = num_idx; + if(!idx_ptr) { + for(int i = 1; i < ndims; ++i) + data_len *= dims[i]; + } indices = (int64_t*)idx; indices_len = num_idx; @@ -276,25 +280,6 @@ struct MXInSparse { } }; -// For sparse output, cannot read/write data from NDArray directly, since -// size is known during run time. Need a copy. -struct MXOutSparse { - // Data of sparse output. - std::vector &m_data; - - // To store aux data for sparse. - // For CSR, indices stores the col index of non-zero values. - // For row sparse, indices store row index of rows which have non-zero values. - std::vector &m_col_idx; - - // For CSR, indptr gives the start and end index of data for each row. - // For row sparse, indptr is empty. - std::vector &m_row_ptr; - - MXOutSparse(std::vector &data, std::vector &col_idx, std::vector &row_ptr) : - m_data(data), m_col_idx(col_idx), m_row_ptr(row_ptr) {} -}; - /*! * \brief Tensor data structure used by custom operator */ @@ -433,6 +418,8 @@ struct MXTensor { /*! \brief resource malloc function to allocate memory inside Forward/Backward functions */ typedef void* (*xpu_malloc_t)(void*, int); +typedef void (*ndarray_malloc_t)(void*, int, int, int, void**, int64_t**, int64_t**); + #if defined(__NVCC__) typedef cudaStream_t mx_stream_t; #else @@ -449,6 +436,13 @@ class OpResource { : cpu_malloc(cpu_malloc_fp), gpu_malloc(gpu_malloc_fp), cpu_alloc(cpu_alloc_fp), gpu_alloc(gpu_alloc_fp), cuda_stream(stream) {} + OpResource(xpu_malloc_t cpu_malloc_fp, void* cpu_alloc_fp, + xpu_malloc_t gpu_malloc_fp, void* gpu_alloc_fp, void* stream, + ndarray_malloc_t ndarray_malloc_fp, void* ndarray_alloc_fp) + : cpu_malloc(cpu_malloc_fp), gpu_malloc(gpu_malloc_fp), + cpu_alloc(cpu_alloc_fp), gpu_alloc(gpu_alloc_fp), cuda_stream(stream), + ndarray_malloc(ndarray_malloc_fp), ndarray_alloc(ndarray_alloc_fp) {} + /*! \brief allocate cpu memory controlled by MXNet */ void* alloc_cpu(int size) { return cpu_malloc(cpu_alloc, size); @@ -464,6 +458,11 @@ class OpResource { return static_cast(cuda_stream); } + void alloc_ndarray(MXSparse* sparse, int index, int indices_len, int indptr_len = 0) { + ndarray_malloc(ndarray_alloc, index, indices_len, indptr_len, + &(sparse->data), &(sparse->indices), &(sparse->indptr)); + } + private: /*! \brief allocation lambda function */ xpu_malloc_t cpu_malloc, gpu_malloc; @@ -471,6 +470,9 @@ class OpResource { void *cpu_alloc, *gpu_alloc; /*! \brief cuda stream passed from MXNet */ void *cuda_stream; + + ndarray_malloc_t ndarray_malloc; + void *ndarray_alloc; }; /*! @@ -946,11 +948,11 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indices, void** in_indptr, - int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, - std::vector>& col_idx, - std::vector>& row_ptr); + ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); #define MXLIB_OPCALLMUTATEINPUTS_STR "_opCallMutateInputs" typedef int (*opCallMutateInputs_t)(mutateInputs_t mutate, const char* const* keys, @@ -974,12 +976,11 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indices, void** in_indptr, - int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, - std::vector>& col_idx, - std::vector>& row_ptr); - + ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); #define MXLIB_PARTREGSIZE_STR "_partRegSize" typedef int (*partRegSize_t)(void); @@ -1184,11 +1185,10 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - void** in_indices, void** in_indptr, - int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, - std::vector>& col_idx, - std::vector>& row_ptr) { + ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create map of attributes from list std::map attrs; for (int i = 0; i < num; i++) { @@ -1198,7 +1198,7 @@ extern "C" { // create a vector of tensors for inputs std::vector inputs(num_in); // create a vector for sparse inputs - std::vector in_sparse(num_in); + std::vector in_sparse(num_in); for (int i = 0; i < num_in; i++) { // Dense representation. @@ -1225,25 +1225,32 @@ extern "C" { // create a vector of tensors for outputs std::vector outputs(num_out); - // create a vector for sparse outputs - std::vector out_sparse; + std::vector out_sparse(num_out); for (int i = 0; i < num_out; i++) { - if(col_idx.empty()) { + // Dense representation. + if(!out_indices_shapes) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); + outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } // Sparse representation. else { - out_sparse.push_back(MXOutSparse(tmp_data[0], col_idx[0], row_ptr[0])); - MXStorageType type = row_ptr.empty() ? kRowSparseStorage : kCSRStorage; + MXStorageType type; + if(!out_indptr_shapes) { + type = kRowSparseStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); + } + else { + type = kCSRStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], + out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); + } outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, type); + outIDs[i], {outdev_type[i], outdev_id[i]}, type); } } - OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream); - + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream, ndarray_malloc, ndarray_alloc); return fcomp(attrs, inputs, outputs, res); } @@ -1313,15 +1320,15 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - void** in_indices, void** in_indptr, - int64_t* in_indices_shapes, int64_t* in_indptr_shapes, - std::vector>& tmp_data, - std::vector>& col_idx, - std::vector>& row_ptr) { + ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { + // create a vector of tensors for inputs std::vector inputs(num_in); // create a vector for sparse inputs - std::vector in_sparse(num_in); + std::vector in_sparse(num_in); for (int i = 0; i < num_in; i++) { // Dense representation. @@ -1349,23 +1356,32 @@ extern "C" { // create a vector of tensors for outputs std::vector outputs(num_out); // create a vector for sparse outputs - std::vector out_sparse; + std::vector out_sparse(num_out); for (int i = 0; i < num_out; i++) { - if(col_idx.empty()) { + // Dense representation. + if(!out_indices_shapes) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } // Sparse representation. else { - out_sparse.push_back(MXOutSparse(tmp_data[0], col_idx[0], row_ptr[0])); - MXStorageType type = row_ptr.empty() ? kRowSparseStorage : kCSRStorage; + MXStorageType type; + if(!out_indptr_shapes) { + type = kRowSparseStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); + } + else { + type = kCSRStorage; + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], + out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); + } outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, type); } } - OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, stream); + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, stream, ndarray_malloc, ndarray_alloc); CustomStatefulOp* op_ptr = reinterpret_cast(state_op); if (is_forward) { diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index a5fb090f0d32..6d50e1e93818 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -121,11 +121,11 @@ void CustomFComputeDispatcher(const std::string op_name, std::vector in_dev_type, out_dev_type; std::vector in_dev_id, out_dev_id; - // Extra data for sparse inputs. - std::vector in_indices; - std::vector in_indptr; - std::vector in_indices_shapes; - std::vector in_indptr_shapes; + // Extra data for sparse inputs and outputs. + std::vector in_indices, out_indices; + std::vector in_indptr, out_indptr; + std::vector in_indices_shapes, out_indices_shapes; + std::vector in_indptr_shapes, out_indptr_shapes; // convert inputs/outpus NDArray to C types to be passed to lib_api.h for (size_t i = 0; i < inputs.size(); i++) { @@ -150,11 +150,6 @@ void CustomFComputeDispatcher(const std::string op_name, } } - // Extra data for sparse outputs. - // To do: fix data type. - std::vector> tmp_data; - std::vector> col_index, row_ptr; - for (size_t i = 0; i < outputs.size(); i++) { out_data.push_back(outputs[i].data().dptr_); out_shapes.push_back(outputs[i].shape().data()); @@ -166,14 +161,15 @@ void CustomFComputeDispatcher(const std::string op_name, out_dev_id.push_back(outputs[i].ctx().real_dev_id()); if(outputs[i].storage_type() == mxnet::kRowSparseStorage) { - tmp_data.push_back(std::vector()); - col_index.push_back(std::vector()); + out_indices.push_back(outputs[i].aux_data(rowsparse::kIdx).dptr_); + out_indices_shapes.push_back(outputs[i].aux_shape(rowsparse::kIdx).Size()); } else if(outputs[i].storage_type() == mxnet::kCSRStorage) { - tmp_data.push_back(std::vector()); - col_index.push_back(std::vector()); - row_ptr.push_back(std::vector()); - } + out_indices.push_back(outputs[i].aux_data(csr::kIdx).dptr_); + out_indptr.push_back(outputs[i].aux_data(csr::kIndPtr).dptr_); + out_indices_shapes.push_back(outputs[i].aux_shape(csr::kIdx).Size()); + out_indptr_shapes.push_back(outputs[i].aux_shape(csr::kIndPtr).Size()); + } } // get memory resource and mxnet backend streams @@ -194,6 +190,24 @@ void CustomFComputeDispatcher(const std::string op_name, return workspace.dptr_; }; + // create lambda that allocates memory for sparse and updates MXSparse. + auto ndarray_alloc = [&](int index, int indices_len, int idxptr_len, + void** data, int64_t** indices, int64_t** indptr) { + // Row Sparse + if(idxptr_len == 0) { + outputs[index].CheckAndAlloc({mshadow::Shape1(indices_len)}); + *data = outputs[index].data().dptr_; + *indices = (int64_t*)outputs[index].aux_data(rowsparse::kIdx).dptr_; + } + // CSR + else { + outputs[index].CheckAndAlloc({mshadow::Shape1(idxptr_len), mshadow::Shape1(indices_len)}); + *data = outputs[index].data().dptr_; + *indices = (int64_t*)outputs[index].aux_data(csr::kIdx).dptr_; + *indptr = (int64_t*)outputs[index].aux_data(csr::kIndPtr).dptr_; + } + }; + // create lambda without captures so that we can cast it to function pointer // lambda with captures cannot be cast to function pointer and pass to lib_api.h // this needs to be a lambda function so that we can do the decltype cast @@ -210,6 +224,13 @@ void CustomFComputeDispatcher(const std::string op_name, return static_cast((*gpualloc)(size)); }; + typedef decltype(ndarray_alloc) alloc_type_ndarray; + auto ndarray_malloc = [](void* _ndarray_alloc, int index, int indices_len, int idxptr_len, + void** data, int64_t** indices, int64_t** indptr) { + alloc_type_ndarray* ndarrayalloc = static_cast(_ndarray_alloc); + (*ndarrayalloc)(index, indices_len, idxptr_len, data, indices, indptr); + }; + // get actual cudaStream_t out of mxnet gpu stream and pass to lib_api.h void *cuda_stream = nullptr; #if MXNET_USE_CUDA @@ -237,8 +258,10 @@ void CustomFComputeDispatcher(const std::string op_name, out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indices.data(), in_indptr.data(), in_indices_shapes.data(), - in_indptr_shapes.data(), tmp_data, col_index, row_ptr)) + ndarray_malloc, &ndarray_alloc, + in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices_shapes.data(), out_indices_shapes.data(), + in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FCompute for custom operator '" << op_name << "'"; } @@ -258,26 +281,12 @@ void CustomFComputeDispatcher(const std::string op_name, out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - in_indices.data(), in_indptr.data(), - in_indices_shapes.data(), in_indptr_shapes.data(), - tmp_data, col_index, row_ptr)) + ndarray_malloc, &ndarray_alloc, + in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices_shapes.data(), out_indices_shapes.data(), + in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"; } - - // Alloc space for sparse output and copy data to saprse NDArray. - for (size_t i = 0; i < outputs.size(); i++) { - if (outputs[i].storage_type() == mxnet::kDefaultStorage) continue; - if (outputs[i].storage_type() == mxnet::kRowSparseStorage) { - outputs[i].CheckAndAlloc({mshadow::Shape1(col_index[i].size())}); - memcpy(outputs[i].aux_data(rowsparse::kIdx).dptr_, col_index[i].data(), sizeof(int64_t) * col_index[i].size()); - } - else if (outputs[i].storage_type() == mxnet::kCSRStorage) { - outputs[i].CheckAndAlloc({mshadow::Shape1(row_ptr[i].size()), mshadow::Shape1(col_index[i].size())}); - memcpy(outputs[i].aux_data(csr::kIndPtr).dptr_, row_ptr[i].data(), sizeof(int64_t) * row_ptr[i].size()); - memcpy(outputs[i].aux_data(csr::kIdx).dptr_, col_index[i].data(), sizeof(int64_t) * col_index[i].size()); - } - memcpy(outputs[i].data().dptr_, tmp_data[i].data(), sizeof(float) * tmp_data[i].size()); - } } /*! From 3e7f23cb37faa1dbca5b888893cf12d7bc87076d Mon Sep 17 00:00:00 2001 From: guanxinq Date: Wed, 11 Mar 2020 20:46:41 +0000 Subject: [PATCH 17/26] Fix mixed storage formats error --- include/mxnet/lib_api.h | 24 ++++++++++++++---------- src/c_api/c_api.cc | 41 +++++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index c7d6aa7deffc..8dbaf74a072f 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -949,6 +949,7 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + int* instypes, int* outstypes, void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, @@ -977,6 +978,7 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + int* instypes, int* outstypes, void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, @@ -1186,7 +1188,8 @@ extern "C" { xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, - void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int* instypes, int* outstypes, void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create map of attributes from list @@ -1202,14 +1205,14 @@ extern "C" { for (int i = 0; i < num_in; i++) { // Dense representation. - if(!in_indices_shapes) { + if(instypes[i] == 0) { inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); } // Sparse representation. else { MXStorageType type; - if(!in_indptr_shapes) { + if(instypes[i] == 1) { type = kRowSparseStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); } @@ -1229,14 +1232,14 @@ extern "C" { for (int i = 0; i < num_out; i++) { // Dense representation. - if(!out_indices_shapes) { + if(outstypes[i] == 0) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } // Sparse representation. else { MXStorageType type; - if(!out_indptr_shapes) { + if(outstypes[i] == 1) { type = kRowSparseStorage; out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); } @@ -1321,7 +1324,8 @@ extern "C" { xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, - void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, + int* instypes, int* outstypes, void** in_indices, void** out_indices, + void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { @@ -1332,14 +1336,14 @@ extern "C" { for (int i = 0; i < num_in; i++) { // Dense representation. - if(!in_indices_shapes) { + if(instypes[i] == 0) { inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); } // Sparse representation. else { MXStorageType type; - if(!in_indptr_shapes) { + if(instypes[i] == 1) { type = kRowSparseStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); } @@ -1360,14 +1364,14 @@ extern "C" { for (int i = 0; i < num_out; i++) { // Dense representation. - if(!out_indices_shapes) { + if(outstypes[i] == 0) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); } // Sparse representation. else { MXStorageType type; - if(!out_indptr_shapes) { + if(outstypes[i] == 1) { type = kRowSparseStorage; out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 6d50e1e93818..4abae6d9544f 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -122,10 +122,11 @@ void CustomFComputeDispatcher(const std::string op_name, std::vector in_dev_id, out_dev_id; // Extra data for sparse inputs and outputs. - std::vector in_indices, out_indices; - std::vector in_indptr, out_indptr; - std::vector in_indices_shapes, out_indices_shapes; - std::vector in_indptr_shapes, out_indptr_shapes; + std::vector in_stypes(inputs.size(), 0), out_stypes(outputs.size(), 0); + std::vector in_indices(inputs.size(), nullptr), out_indices(outputs.size(), nullptr); + std::vector in_indptr(inputs.size(), nullptr), out_indptr(outputs.size(), nullptr); + std::vector in_indices_shapes(inputs.size(), 0), out_indices_shapes(outputs.size(), 0); + std::vector in_indptr_shapes(inputs.size(), 0), out_indptr_shapes(outputs.size(), 0); // convert inputs/outpus NDArray to C types to be passed to lib_api.h for (size_t i = 0; i < inputs.size(); i++) { @@ -139,14 +140,16 @@ void CustomFComputeDispatcher(const std::string op_name, in_dev_id.push_back(inputs[i].ctx().real_dev_id()); if(inputs[i].storage_type() == mxnet::kRowSparseStorage) { - in_indices.push_back(inputs[i].aux_data(rowsparse::kIdx).dptr_); - in_indices_shapes.push_back(inputs[i].aux_shape(rowsparse::kIdx).Size()); + in_stypes[i] = 1; + in_indices[i] = inputs[i].aux_data(rowsparse::kIdx).dptr_; + in_indices_shapes[i] = inputs[i].aux_shape(rowsparse::kIdx).Size(); } else if(inputs[i].storage_type() == mxnet::kCSRStorage) { - in_indices.push_back(inputs[i].aux_data(csr::kIdx).dptr_); - in_indptr.push_back(inputs[i].aux_data(csr::kIndPtr).dptr_); - in_indices_shapes.push_back(inputs[i].aux_shape(csr::kIdx).Size()); - in_indptr_shapes.push_back(inputs[i].aux_shape(csr::kIndPtr).Size()); + in_stypes[i] = 2; + in_indices[i] = inputs[i].aux_data(csr::kIdx).dptr_; + in_indptr[i] = inputs[i].aux_data(csr::kIndPtr).dptr_; + in_indices_shapes[i] = inputs[i].aux_shape(csr::kIdx).Size(); + in_indptr_shapes[i] = inputs[i].aux_shape(csr::kIndPtr).Size(); } } @@ -161,14 +164,16 @@ void CustomFComputeDispatcher(const std::string op_name, out_dev_id.push_back(outputs[i].ctx().real_dev_id()); if(outputs[i].storage_type() == mxnet::kRowSparseStorage) { - out_indices.push_back(outputs[i].aux_data(rowsparse::kIdx).dptr_); - out_indices_shapes.push_back(outputs[i].aux_shape(rowsparse::kIdx).Size()); + out_stypes[i] = 1; + out_indices[i] = outputs[i].aux_data(rowsparse::kIdx).dptr_; + out_indices_shapes[i] = outputs[i].aux_shape(rowsparse::kIdx).Size(); } else if(outputs[i].storage_type() == mxnet::kCSRStorage) { - out_indices.push_back(outputs[i].aux_data(csr::kIdx).dptr_); - out_indptr.push_back(outputs[i].aux_data(csr::kIndPtr).dptr_); - out_indices_shapes.push_back(outputs[i].aux_shape(csr::kIdx).Size()); - out_indptr_shapes.push_back(outputs[i].aux_shape(csr::kIndPtr).Size()); + out_stypes[i] = 2; + out_indices[i] = outputs[i].aux_data(csr::kIdx).dptr_; + out_indptr[i] = outputs[i].aux_data(csr::kIndPtr).dptr_; + out_indices_shapes[i] = outputs[i].aux_shape(csr::kIdx).Size(); + out_indptr_shapes[i] = outputs[i].aux_shape(csr::kIndPtr).Size(); } } @@ -258,7 +263,7 @@ void CustomFComputeDispatcher(const std::string op_name, out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - ndarray_malloc, &ndarray_alloc, + ndarray_malloc, &ndarray_alloc, in_stypes.data(), out_stypes.data(), in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) @@ -281,7 +286,7 @@ void CustomFComputeDispatcher(const std::string op_name, out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - ndarray_malloc, &ndarray_alloc, + ndarray_malloc, &ndarray_alloc, in_stypes.data(), out_stypes.data(), in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) From b97bfadad70e03ee92e85e8f81a96fc9dae352cb Mon Sep 17 00:00:00 2001 From: guanxinq Date: Thu, 12 Mar 2020 21:18:29 +0000 Subject: [PATCH 18/26] Added infer storage type function --- example/extensions/lib_custom_op/gemm_lib.cc | 18 ++++++ example/extensions/lib_custom_op/relu_lib.cu | 9 +++ .../extensions/lib_custom_op/transcsr_lib.cc | 9 +++ .../lib_custom_op/transrowsp_lib.cc | 9 +++ include/mxnet/lib_api.h | 56 ++++++++++++++++++- src/c_api/c_api.cc | 35 ++++++++++-- 6 files changed, 129 insertions(+), 7 deletions(-) diff --git a/example/extensions/lib_custom_op/gemm_lib.cc b/example/extensions/lib_custom_op/gemm_lib.cc index daeac337f4d6..c34c0ac84df8 100644 --- a/example/extensions/lib_custom_op/gemm_lib.cc +++ b/example/extensions/lib_custom_op/gemm_lib.cc @@ -140,6 +140,22 @@ MXReturnValue inferType(std::map attrs, return MX_SUCCESS; } +MXReturnValue inferSType(std::map attrs, + std::vector &instypes, + std::vector &outstypes) { + // validate inputs + for (unsigned i = 0; i < instypes.size(); i++) { + if (instypes[i] != kDefaultStorage) { + std::cout << "Expected input " << i << " to have dense storage type" << std::endl; + return MX_FAIL; + } + } + for (unsigned i = 0; i < instypes.size(); i++) { + outstypes[i] = instypes[0]; + } + return MX_SUCCESS; +} + MXReturnValue inferShape(std::map attrs, std::vector> &inshapes, std::vector> &outshapes) { @@ -171,6 +187,7 @@ REGISTER_OP(my_gemm) .setBackward(backward, "cpu") .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape); /* ------------------------------------------------------------------------- */ @@ -219,6 +236,7 @@ MXReturnValue mutateInputs(std::map attrs, REGISTER_OP(state_gemm) .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape) .setMutateInputs(mutateInputs) .setCreateOpState(createOpState, "cpu"); diff --git a/example/extensions/lib_custom_op/relu_lib.cu b/example/extensions/lib_custom_op/relu_lib.cu index 3beb68c20fa7..ee57ce38a429 100644 --- a/example/extensions/lib_custom_op/relu_lib.cu +++ b/example/extensions/lib_custom_op/relu_lib.cu @@ -109,6 +109,13 @@ MXReturnValue inferType(std::map attrs, return MX_SUCCESS; } +MXReturnValue inferSType(std::map attrs, + std::vector &instypes, + std::vector &outstypes) { + outstypes[0] = instypes[0]; + return MX_SUCCESS; +} + MXReturnValue inferShape(std::map attrs, std::vector> &inshapes, std::vector> &outshapes) { @@ -119,6 +126,7 @@ MXReturnValue inferShape(std::map attrs, REGISTER_OP(my_relu) .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape) .setForward(forwardCPU, "cpu") .setForward(forwardGPU, "gpu") @@ -176,6 +184,7 @@ MXReturnValue createOpStateGPU(std::map attrs, REGISTER_OP(my_state_relu) .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape) .setCreateOpState(createOpStateCPU, "cpu") .setCreateOpState(createOpStateGPU, "gpu"); diff --git a/example/extensions/lib_custom_op/transcsr_lib.cc b/example/extensions/lib_custom_op/transcsr_lib.cc index ab82523ab67e..eec2dfffa9d4 100644 --- a/example/extensions/lib_custom_op/transcsr_lib.cc +++ b/example/extensions/lib_custom_op/transcsr_lib.cc @@ -104,6 +104,13 @@ MXReturnValue inferType(std::map attrs, return MX_SUCCESS; } +MXReturnValue inferSType(std::map attrs, + std::vector &instypes, + std::vector &outstypes) { + outstypes[0] = instypes[0]; + return MX_SUCCESS; +} + MXReturnValue inferShape(std::map attrs, std::vector> &inshapes, std::vector> &outshapes) { @@ -123,6 +130,7 @@ REGISTER_OP(my_transcsr) .setBackward(backward, "cpu") .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape); /* ------------------------------------------------------------------------- */ @@ -171,6 +179,7 @@ MXReturnValue mutateInputs(std::map attrs, REGISTER_OP(state_transcsr) .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape) .setMutateInputs(mutateInputs) .setCreateOpState(createOpState, "cpu"); diff --git a/example/extensions/lib_custom_op/transrowsp_lib.cc b/example/extensions/lib_custom_op/transrowsp_lib.cc index 42187be5ac48..14b8702092b1 100644 --- a/example/extensions/lib_custom_op/transrowsp_lib.cc +++ b/example/extensions/lib_custom_op/transrowsp_lib.cc @@ -108,6 +108,13 @@ MXReturnValue inferType(std::map attrs, return MX_SUCCESS; } +MXReturnValue inferSType(std::map attrs, + std::vector &instypes, + std::vector &outstypes) { + outstypes[0] = instypes[0]; + return MX_SUCCESS; +} + MXReturnValue inferShape(std::map attrs, std::vector> &inshapes, std::vector> &outshapes) { @@ -127,6 +134,7 @@ REGISTER_OP(my_transrowsp) .setBackward(backward, "cpu") .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape); /* ------------------------------------------------------------------------- */ @@ -175,6 +183,7 @@ MXReturnValue mutateInputs(std::map attrs, REGISTER_OP(state_transrowsp) .setParseAttrs(parseAttrs) .setInferType(inferType) +.setInferSType(inferSType) .setInferShape(inferShape) .setMutateInputs(mutateInputs) .setCreateOpState(createOpState, "cpu"); diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 8dbaf74a072f..55c09ed92568 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -687,6 +687,8 @@ typedef MXReturnValue (*parseAttrs_t)(std::map, int*, int*); typedef MXReturnValue (*inferType_t)(std::map, std::vector&, std::vector&); +typedef MXReturnValue (*inferSType_t)(std::map, + std::vector&, std::vector&); typedef MXReturnValue (*inferShape_t)(std::map, std::vector >&, std::vector >&); @@ -701,7 +703,7 @@ typedef MXReturnValue (*createOpState_t)(std::map, class CustomOp { public: explicit CustomOp(const char* op_name) : name(op_name), - parse_attrs(NULL), infer_type(NULL), infer_shape(NULL), mutate_inputs(NULL), isSGop(false) {} + parse_attrs(NULL), infer_type(NULL), infer_storage_type(NULL), infer_shape(NULL), mutate_inputs(NULL), isSGop(false) {} CustomOp& setForward(fcomp_t fcomp, const char* ctx) { if (forward_ctx_map.count(ctx) > 0) raiseDuplicateContextError(); @@ -722,6 +724,10 @@ class CustomOp { infer_type = func; return *this; } + CustomOp& setInferSType(inferSType_t func) { + infer_storage_type = func; + return *this; + } CustomOp& setInferShape(inferShape_t func) { infer_shape = func; return *this; @@ -762,6 +768,7 @@ class CustomOp { /*! \brief operator functions */ parseAttrs_t parse_attrs; inferType_t infer_type; + inferSType_t infer_storage_type; inferShape_t infer_shape; mutateInputs_t mutate_inputs; bool isSGop; @@ -913,7 +920,7 @@ typedef int (*opRegGet_t)(int idx, const char** name, int *isSGop, const char*** backward_ctx, fcomp_t** backward_fp, int* backward_count, const char*** create_op_ctx, createOpState_t** create_op_fp, int* create_op_count, - parseAttrs_t* parse, inferType_t* type, + parseAttrs_t* parse, inferType_t* type, inferSType_t* stype, inferShape_t* shape, mutateInputs_t* mutate); #define MXLIB_OPCALLFREE_STR "_opCallFree" @@ -935,6 +942,11 @@ typedef int (*opCallInferType_t)(inferType_t inferType, const char* const* keys, const char* const* vals, int num, int* intypes, int num_in, int* outtypes, int num_out); +#define MXLIB_OPCALLINFERSTYPE_STR "_opCallInferSType" +typedef int (*opCallInferSType_t)(inferSType_t inferSType, const char* const* keys, + const char* const* vals, int num, + int* intypes, int num_in, int* outtypes, int num_out); + #define MXLIB_OPCALLFCOMP_STR "_opCallFCompute" typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, const char* const* vals, int num, @@ -1042,12 +1054,13 @@ extern "C" { const char*** forward_ctx, fcomp_t** forward_fp, int* forward_count, const char*** backward_ctx, fcomp_t** backward_fp, int* backward_count, const char*** create_op_ctx, createOpState_t** create_op_fp, int* create_op_count, - parseAttrs_t* parse, inferType_t* type, + parseAttrs_t* parse, inferType_t* type, inferSType_t* stype, inferShape_t* shape, mutateInputs_t* mutate) { CustomOp &op = Registry::get()->get(idx); *name = op.name; *parse = op.parse_attrs; *type = op.infer_type; + *stype = op.infer_storage_type; *shape = op.infer_shape; *mutate = op.mutate_inputs; *isSGop = op.isSGop; @@ -1174,6 +1187,43 @@ extern "C" { return retval; } + /*! \brief returns status of calling inferSType function for operator from library */ +#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) + __declspec(dllexport) int __cdecl +#else + int +#endif + _opCallInferSType(inferSType_t inferSType, const char* const* keys, + const char* const* vals, int num, + int* instypes, int num_in, int* outstypes, int num_out) { + // create map of attributes from list + std::map attrs; + for (int i = 0; i < num; i++) { + attrs[std::string(keys[i])] = std::string(vals[i]); + } + + // create a vector of types for inputs + std::vector in_stypes(num_in); + for (int i = 0; i < num_in; i++) { + in_stypes[i] = instypes[i]; + } + + // create a vector of types for outputs + std::vector out_stypes(num_out, -1); + + int retval = inferSType(attrs, in_stypes, out_stypes); + + if (!retval) + return retval; + + // copy output types + for (int i = 0; i < num_out; i++) { + outstypes[i] = out_stypes[i]; + } + + return retval; + } + /*! \brief returns status of calling Forward/Backward function for operator from library */ #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) __declspec(dllexport) int __cdecl diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 4abae6d9544f..25e83c0a8707 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -328,6 +328,9 @@ int MXLoadLib(const char *path) { opCallInferType_t callInferType = get_func(lib, const_cast(MXLIB_OPCALLINFERTYPE_STR)); + opCallInferSType_t callInferSType = + get_func(lib, const_cast(MXLIB_OPCALLINFERSTYPE_STR)); + opCallFComp_t callFComp = get_func(lib, const_cast(MXLIB_OPCALLFCOMP_STR)); @@ -362,6 +365,7 @@ int MXLoadLib(const char *path) { // function pointers holding implementation from custom library parseAttrs_t parse_fp = nullptr; inferType_t type_fp = nullptr; + inferSType_t stype_fp = nullptr; inferShape_t shape_fp = nullptr; // optional attributes mutateInputs_t mutate_fp = nullptr; @@ -378,7 +382,7 @@ int MXLoadLib(const char *path) { &forward_ctx, &forward_fcomp, &forward_count, &backward_ctx, &backward_fcomp, &backward_count, &createop_ctx, &createop_fp, &createop_count, - &parse_fp, &type_fp, &shape_fp, &mutate_fp); + &parse_fp, &type_fp, &stype_fp, &shape_fp, &mutate_fp); // construct maps of context to forward/backward custom library function std::unordered_map forward_ctx_map; @@ -408,6 +412,8 @@ int MXLoadLib(const char *path) { << "' custom op, Forward or CreateOpState function was not set."; CHECK(type_fp != nullptr) << "Error loading '" << name << "' custom op, InferType function was not set."; + CHECK(stype_fp != nullptr) << "Error loading '" << name + << "' custom op, InferSType function was not set."; CHECK(shape_fp != nullptr) << "Error loading '" << name << "' custom op, InferShape function was not set."; } else { @@ -638,9 +644,30 @@ int MXLoadLib(const char *path) { DispatchMode* dispatch_mode, std::vector* in_stypes, std::vector* out_stypes) { - return op::storage_type_assign(out_stypes, - static_cast(in_stypes->at(0)), - dispatch_mode, DispatchMode::kFComputeEx); + // convert attributes to vector of char* + std::vector attr_keys, attr_vals; + for (auto kv : attrs.dict) { + attr_keys.push_back(kv.first.c_str()); + attr_vals.push_back(kv.second.c_str()); + } + // copy input types from in_stype + std::vector instypes(*in_stypes); + + // output types will be populated by inferType function + std::vector outstypes(out_stypes->size()); + + CHECK(callInferSType(stype_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(), + instypes.data(), in_stypes->size(), + outstypes.data(), out_stypes->size())) + << "Error calling InferSType for custom operator '" << name_str << "'"; + + // copy and assign output storage types from custom op to MXNet memory. + for (size_t i = 0; i < out_stypes->size(); i++) { + STORAGE_TYPE_ASSIGN_CHECK(*out_stypes, i, outstypes[i]); + } + // assign dispatch mode + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); + return true; }; // FGradient register lambda From 41f07849899610e15e625d34e8d44f6a701b06d8 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Fri, 13 Mar 2020 23:38:31 +0000 Subject: [PATCH 19/26] resolve comments --- example/extensions/lib_custom_op/Makefile | 12 +++---- ...{test_transcsr.py => test_transposecsr.py} | 12 +++---- ...t_transrowsp.py => test_transposerowsp.py} | 12 +++---- .../{transcsr_lib.cc => transposecsr_lib.cc} | 30 +++++++--------- ...ransrowsp_lib.cc => transposerowsp_lib.cc} | 33 ++++++++--------- include/mxnet/lib_api.h | 35 ++++++++----------- src/c_api/c_api.cc | 19 +++++----- 7 files changed, 70 insertions(+), 83 deletions(-) rename example/extensions/lib_custom_op/{test_transcsr.py => test_transposecsr.py} (89%) rename example/extensions/lib_custom_op/{test_transrowsp.py => test_transposerowsp.py} (88%) rename example/extensions/lib_custom_op/{transcsr_lib.cc => transposecsr_lib.cc} (89%) rename example/extensions/lib_custom_op/{transrowsp_lib.cc => transposerowsp_lib.cc} (89%) diff --git a/example/extensions/lib_custom_op/Makefile b/example/extensions/lib_custom_op/Makefile index f1e0bc5eabfe..feded2947ca3 100644 --- a/example/extensions/lib_custom_op/Makefile +++ b/example/extensions/lib_custom_op/Makefile @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -all: gemm_lib relu_lib transcsr_lib transrowsp_lib +all: gemm_lib relu_lib transposecsr_lib transposerowsp_lib gemm_lib: g++ -shared -fPIC -std=c++11 gemm_lib.cc -o libgemm_lib.so -I ../../../include/mxnet @@ -23,11 +23,11 @@ gemm_lib: relu_lib: nvcc -shared -std=c++11 -Xcompiler -fPIC relu_lib.cu -o librelu_lib.so -I ../../../include/mxnet -transcsr_lib: - g++ -shared -fPIC -std=c++11 transcsr_lib.cc -o libtranscsr_lib.so -I ../../../include/mxnet +transposecsr_lib: + g++ -shared -fPIC -std=c++11 transposecsr_lib.cc -o libtransposecsr_lib.so -I ../../../include/mxnet -transrowsp_lib: - g++ -shared -fPIC -std=c++11 transrowsp_lib.cc -o libtransrowsp_lib.so -I ../../../include/mxnet +transposerowsp_lib: + g++ -shared -fPIC -std=c++11 transposerowsp_lib.cc -o libtransposerowsp_lib.so -I ../../../include/mxnet clean: - rm -rf libgemm_lib.so librelu_lib.so libtranscsr_lib.so libtransrowsp_lib.so + rm -rf libgemm_lib.so librelu_lib.so libtransposecsr_lib.so libtransposerowsp_lib.so diff --git a/example/extensions/lib_custom_op/test_transcsr.py b/example/extensions/lib_custom_op/test_transposecsr.py similarity index 89% rename from example/extensions/lib_custom_op/test_transcsr.py rename to example/extensions/lib_custom_op/test_transposecsr.py index eb8028833974..be2c470f90aa 100644 --- a/example/extensions/lib_custom_op/test_transcsr.py +++ b/example/extensions/lib_custom_op/test_transposecsr.py @@ -28,10 +28,10 @@ #load library if (os.name=='posix'): - path = os.path.abspath('libtranscsr_lib.so') + path = os.path.abspath('libtransposecsr_lib.so') mx.library.load(path) elif (os.name=='nt'): - path = os.path.abspath('libtranscsr_lib.dll') + path = os.path.abspath('libtransposecsr_lib.dll') mx.library.load(path) a = mx.nd.array([[1,3,0,2,1],[0,1,0,0,0],[0,2,4,5,3]]) @@ -42,22 +42,22 @@ print("indptr:", a.indptr.asnumpy()) print("--------Start NDArray Compute---------") -b = mx.nd.my_transcsr(a) +b = mx.nd.my_transposecsr(a) print("Compute Results:") print("data:", b.data.asnumpy()) print("indices:", b.indices.asnumpy()) print("indptr:", b.indptr.asnumpy()) print("Stateful Compute Result:") -c = mx.nd.state_transcsr(a, test_kw=100) +c = mx.nd.state_transposecsr(a, test_kw=100) print("data:", c.data.asnumpy()) print("indices:", c.indices.asnumpy()) print("indptr:", c.indptr.asnumpy()) print("--------start symbolic compute--------") d = mx.sym.Variable('d') -e = mx.sym.my_transcsr(d) -f = mx.sym.state_transcsr(d, test_kw=200) +e = mx.sym.my_transposecsr(d) +f = mx.sym.state_transposecsr(d, test_kw=200) exe = e.bind(ctx=mx.cpu(),args={'d':a}) exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) diff --git a/example/extensions/lib_custom_op/test_transrowsp.py b/example/extensions/lib_custom_op/test_transposerowsp.py similarity index 88% rename from example/extensions/lib_custom_op/test_transrowsp.py rename to example/extensions/lib_custom_op/test_transposerowsp.py index 217efc016f66..8c00095a3589 100644 --- a/example/extensions/lib_custom_op/test_transrowsp.py +++ b/example/extensions/lib_custom_op/test_transposerowsp.py @@ -28,10 +28,10 @@ #load library if (os.name=='posix'): - path = os.path.abspath('libtransrowsp_lib.so') + path = os.path.abspath('libtransposerowsp_lib.so') mx.library.load(path) elif (os.name=='nt'): - path = os.path.abspath('libtransrowsp_lib.dll') + path = os.path.abspath('libtransposerowsp_lib.dll') mx.library.load(path) a = mx.nd.array([[1,2,3],[0,0,0],[4,0,5],[0,0,0],[0,0,0]]) @@ -42,20 +42,20 @@ print("indices:", a.indices.asnumpy()) print("--------Start NDArray Compute---------") -b = mx.nd.my_transrowsp(a) +b = mx.nd.my_transposerowsp(a) print("Compute Results:") print("data:", b.data.asnumpy()) print("indices:", b.indices.asnumpy()) print("Stateful Compute Result:") -c = mx.nd.state_transrowsp(a, test_kw=100) +c = mx.nd.state_transposerowsp(a, test_kw=100) print("data:", c.data.asnumpy()) print("indices:", c.indices.asnumpy()) print("--------start symbolic compute--------") d = mx.sym.Variable('d') -e = mx.sym.my_transrowsp(d) -f = mx.sym.state_transrowsp(d, test_kw=200) +e = mx.sym.my_transposerowsp(d) +f = mx.sym.state_transposerowsp(d, test_kw=200) exe = e.bind(ctx=mx.cpu(),args={'d':a}) exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) diff --git a/example/extensions/lib_custom_op/transcsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc similarity index 89% rename from example/extensions/lib_custom_op/transcsr_lib.cc rename to example/extensions/lib_custom_op/transposecsr_lib.cc index eec2dfffa9d4..eb1cec3a9eff 100644 --- a/example/extensions/lib_custom_op/transcsr_lib.cc +++ b/example/extensions/lib_custom_op/transposecsr_lib.cc @@ -46,7 +46,7 @@ void transpose(MXTensor src, MXTensor dst, OpResource res) { // Alloc memory for sparse data, where 0 is the index // of B in output vector. - res.alloc_ndarray(B, 0, A->data_len, w + 1); + res.alloc_sparse(B, 0, A->data_len, w + 1); float *Bval = (float*) (B->data); for(int i = 0; i < h; i++) { for(int j = A->indptr[i]; j < A->indptr[i + 1]; j++) { @@ -63,10 +63,15 @@ MXReturnValue forward(std::map attrs, std::vector inputs, std::vector outputs, OpResource res) { - // The data types and storage types of inputs and outputs should be the same. - if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) + if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) { + std::cout << "Error! Expected all inputs and outputs to be the same type." + << "Found input storage type:" << inputs[0].stype + << " Found output storage type:" << outputs[0].stype + << " Found input data type:" << inputs[0].dtype + << " Found output data type:" << outputs[0].dtype << std::endl; return MX_FAIL; + } transpose(inputs[0], outputs[0], res); return MX_SUCCESS; @@ -125,7 +130,7 @@ MXReturnValue inferShape(std::map attrs, return MX_SUCCESS; } -REGISTER_OP(my_transcsr) +REGISTER_OP(my_transposecsr) .setForward(forward, "cpu") .setBackward(backward, "cpu") .setParseAttrs(parseAttrs) @@ -135,9 +140,9 @@ REGISTER_OP(my_transcsr) /* ------------------------------------------------------------------------- */ -class MyStatefulTransCSR : public CustomStatefulOp { +class MyStatefulTransposeCSR : public CustomStatefulOp { public: - explicit MyStatefulTransCSR(int count) : count(count) {} + explicit MyStatefulTransposeCSR(int count) : count(count) {} MXReturnValue Forward(std::vector inputs, std::vector outputs, @@ -154,8 +159,6 @@ class MyStatefulTransCSR : public CustomStatefulOp { return backward(attrs, inputs, outputs, op_res); } - ~MyStatefulTransCSR() {} - private: int count; }; @@ -165,23 +168,16 @@ MXReturnValue createOpState(std::map attrs, // testing passing of keyword arguments int count = attrs.count("test_kw") > 0 ? std::stoi(attrs["test_kw"]) : 0; // creating stateful operator instance - *op_inst = new MyStatefulTransCSR(count); + *op_inst = new MyStatefulTransposeCSR(count); std::cout << "Info: stateful operator created" << std::endl; return MX_SUCCESS; } -MXReturnValue mutateInputs(std::map attrs, - std::vector &input_indices) { - // input_indices.push_back(1); // mark mutate input - return MX_SUCCESS; -} - -REGISTER_OP(state_transcsr) +REGISTER_OP(state_transposecsr) .setParseAttrs(parseAttrs) .setInferType(inferType) .setInferSType(inferSType) .setInferShape(inferShape) -.setMutateInputs(mutateInputs) .setCreateOpState(createOpState, "cpu"); MXReturnValue initialize(int version) { diff --git a/example/extensions/lib_custom_op/transrowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc similarity index 89% rename from example/extensions/lib_custom_op/transrowsp_lib.cc rename to example/extensions/lib_custom_op/transposerowsp_lib.cc index 14b8702092b1..b8541d799138 100644 --- a/example/extensions/lib_custom_op/transrowsp_lib.cc +++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc @@ -51,7 +51,7 @@ void transpose(MXTensor src, MXTensor dst, OpResource res) { } } - res.alloc_ndarray(B, 0, mp.size()); + res.alloc_sparse(B, 0, mp.size()); float *Bval = (float*) (B->data); int didx = 0, iidx = 0; for(auto i : mp) { @@ -67,11 +67,15 @@ MXReturnValue forward(std::map attrs, std::vector inputs, std::vector outputs, OpResource res) { - - // The data types and storage types of inputs and outputs should be the same. - if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) + // The data types and storage types of inputs and outputs should be the same. + if(inputs[0].dtype != outputs[0].dtype || inputs[0].stype != outputs[0].stype) { + std::cout << "Error! Expected all inputs and outputs to be the same type." + << "Found input storage type:" << inputs[0].stype + << " Found output storage type:" << outputs[0].stype + << " Found input data type:" << inputs[0].dtype + << " Found output data type:" << outputs[0].dtype << std::endl; return MX_FAIL; - + } transpose(inputs[0], outputs[0], res); return MX_SUCCESS; } @@ -129,7 +133,7 @@ MXReturnValue inferShape(std::map attrs, return MX_SUCCESS; } -REGISTER_OP(my_transrowsp) +REGISTER_OP(my_transposerowsp) .setForward(forward, "cpu") .setBackward(backward, "cpu") .setParseAttrs(parseAttrs) @@ -139,9 +143,9 @@ REGISTER_OP(my_transrowsp) /* ------------------------------------------------------------------------- */ -class MyStatefulTransRowSP : public CustomStatefulOp { +class MyStatefulTransposeRowSP : public CustomStatefulOp { public: - explicit MyStatefulTransRowSP(int count) : count(count) {} + explicit MyStatefulTransposeRowSP(int count) : count(count) {} MXReturnValue Forward(std::vector inputs, std::vector outputs, @@ -158,8 +162,6 @@ class MyStatefulTransRowSP : public CustomStatefulOp { return backward(attrs, inputs, outputs, op_res); } - ~MyStatefulTransRowSP() {} - private: int count; }; @@ -169,23 +171,16 @@ MXReturnValue createOpState(std::map attrs, // testing passing of keyword arguments int count = attrs.count("test_kw") > 0 ? std::stoi(attrs["test_kw"]) : 0; // creating stateful operator instance - *op_inst = new MyStatefulTransRowSP(count); + *op_inst = new MyStatefulTransposeRowSP(count); std::cout << "Info: stateful operator created" << std::endl; return MX_SUCCESS; } -MXReturnValue mutateInputs(std::map attrs, - std::vector &input_indices) { - // input_indices.push_back(1); // mark mutate input - return MX_SUCCESS; -} - -REGISTER_OP(state_transrowsp) +REGISTER_OP(state_transposerowsp) .setParseAttrs(parseAttrs) .setInferType(inferType) .setInferSType(inferSType) .setInferShape(inferShape) -.setMutateInputs(mutateInputs) .setCreateOpState(createOpState, "cpu"); MXReturnValue initialize(int version) { diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 55c09ed92568..132faee729cc 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -241,7 +241,7 @@ enum MXReturnValue { MX_SUCCESS = 1, }; -// For sparse input, read/write the data from NDarray via pointers. +// For sparse tensors, read/write the data from NDarray via pointers. struct MXSparse { // Pointer to data. void *data{nullptr}; @@ -418,7 +418,7 @@ struct MXTensor { /*! \brief resource malloc function to allocate memory inside Forward/Backward functions */ typedef void* (*xpu_malloc_t)(void*, int); -typedef void (*ndarray_malloc_t)(void*, int, int, int, void**, int64_t**, int64_t**); +typedef void (*sparse_malloc_t)(void*, int, int, int, void**, int64_t**, int64_t**); #if defined(__NVCC__) typedef cudaStream_t mx_stream_t; @@ -431,17 +431,12 @@ typedef void (*ndarray_malloc_t)(void*, int, int, int, void**, int64_t**, int64_ */ class OpResource { public: - OpResource(xpu_malloc_t cpu_malloc_fp, void* cpu_alloc_fp, - xpu_malloc_t gpu_malloc_fp, void* gpu_alloc_fp, void* stream) - : cpu_malloc(cpu_malloc_fp), gpu_malloc(gpu_malloc_fp), - cpu_alloc(cpu_alloc_fp), gpu_alloc(gpu_alloc_fp), cuda_stream(stream) {} - OpResource(xpu_malloc_t cpu_malloc_fp, void* cpu_alloc_fp, xpu_malloc_t gpu_malloc_fp, void* gpu_alloc_fp, void* stream, - ndarray_malloc_t ndarray_malloc_fp, void* ndarray_alloc_fp) + sparse_malloc_t sparse_malloc_fp, void* sparse_alloc_fp) : cpu_malloc(cpu_malloc_fp), gpu_malloc(gpu_malloc_fp), cpu_alloc(cpu_alloc_fp), gpu_alloc(gpu_alloc_fp), cuda_stream(stream), - ndarray_malloc(ndarray_malloc_fp), ndarray_alloc(ndarray_alloc_fp) {} + sparse_malloc(sparse_malloc_fp), sparse_alloc(sparse_alloc_fp) {} /*! \brief allocate cpu memory controlled by MXNet */ void* alloc_cpu(int size) { @@ -458,8 +453,8 @@ class OpResource { return static_cast(cuda_stream); } - void alloc_ndarray(MXSparse* sparse, int index, int indices_len, int indptr_len = 0) { - ndarray_malloc(ndarray_alloc, index, indices_len, indptr_len, + void alloc_sparse(MXSparse* sparse, int index, int indices_len, int indptr_len = 0) { + sparse_malloc(sparse_alloc, index, indices_len, indptr_len, &(sparse->data), &(sparse->indices), &(sparse->indptr)); } @@ -471,8 +466,8 @@ class OpResource { /*! \brief cuda stream passed from MXNet */ void *cuda_stream; - ndarray_malloc_t ndarray_malloc; - void *ndarray_alloc; + sparse_malloc_t sparse_malloc; + void *sparse_alloc; }; /*! @@ -960,7 +955,7 @@ typedef int (*opCallFComp_t)(fcomp_t fcomp, const char* const* keys, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + sparse_malloc_t sparse_malloc, void* sparse_alloc, int* instypes, int* outstypes, void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, @@ -989,7 +984,7 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + sparse_malloc_t sparse_malloc, void* sparse_alloc, int* instypes, int* outstypes, void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, @@ -1216,7 +1211,7 @@ extern "C" { if (!retval) return retval; - // copy output types + // copy output storage types for (int i = 0; i < num_out; i++) { outstypes[i] = out_stypes[i]; } @@ -1237,7 +1232,7 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream, - ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + sparse_malloc_t sparse_malloc, void* sparse_alloc, int* instypes, int* outstypes, void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, @@ -1303,7 +1298,7 @@ extern "C" { } } - OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream, ndarray_malloc, ndarray_alloc); + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream, sparse_malloc, sparse_alloc); return fcomp(attrs, inputs, outputs, res); } @@ -1373,7 +1368,7 @@ extern "C" { size_t* outIDs, const char** outdev_type, int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc, void* stream, - ndarray_malloc_t ndarray_malloc, void* ndarray_alloc, + sparse_malloc_t sparse_malloc, void* sparse_alloc, int* instypes, int* outstypes, void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, @@ -1435,7 +1430,7 @@ extern "C" { } } - OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, stream, ndarray_malloc, ndarray_alloc); + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, stream, sparse_malloc, sparse_alloc); CustomStatefulOp* op_ptr = reinterpret_cast(state_op); if (is_forward) { diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 25e83c0a8707..9276813bfde9 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -195,8 +195,9 @@ void CustomFComputeDispatcher(const std::string op_name, return workspace.dptr_; }; - // create lambda that allocates memory for sparse and updates MXSparse. - auto ndarray_alloc = [&](int index, int indices_len, int idxptr_len, + // create lambda that allocates memory for sparse and + // returns allocated arrays for data, indices and indptr. + auto sparse_alloc = [&](int index, int indices_len, int idxptr_len, void** data, int64_t** indices, int64_t** indptr) { // Row Sparse if(idxptr_len == 0) { @@ -229,11 +230,11 @@ void CustomFComputeDispatcher(const std::string op_name, return static_cast((*gpualloc)(size)); }; - typedef decltype(ndarray_alloc) alloc_type_ndarray; - auto ndarray_malloc = [](void* _ndarray_alloc, int index, int indices_len, int idxptr_len, + typedef decltype(sparse_alloc) alloc_type_sparse; + auto sparse_malloc = [](void* _sparse_alloc, int index, int indices_len, int idxptr_len, void** data, int64_t** indices, int64_t** indptr) { - alloc_type_ndarray* ndarrayalloc = static_cast(_ndarray_alloc); - (*ndarrayalloc)(index, indices_len, idxptr_len, data, indices, indptr); + alloc_type_sparse* sparsealloc = static_cast(_sparse_alloc); + (*sparsealloc)(index, indices_len, idxptr_len, data, indices, indptr); }; // get actual cudaStream_t out of mxnet gpu stream and pass to lib_api.h @@ -263,7 +264,7 @@ void CustomFComputeDispatcher(const std::string op_name, out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - ndarray_malloc, &ndarray_alloc, in_stypes.data(), out_stypes.data(), + sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(), in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) @@ -286,7 +287,7 @@ void CustomFComputeDispatcher(const std::string op_name, out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, - ndarray_malloc, &ndarray_alloc, in_stypes.data(), out_stypes.data(), + sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(), in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) @@ -757,8 +758,8 @@ int MXLoadLib(const char *path) { regOp.set_num_inputs(num_inputs); regOp.set_num_outputs(num_outputs); regOp.set_attr("FInferType", infer_type, plevel); - regOp.set_attr("FInferShape", infer_shape, plevel); regOp.set_attr("FInferStorageType", infer_storage_type, plevel); + regOp.set_attr("FInferShape", infer_shape, plevel); regOp.set_attr("FResourceRequest", resc_req, plevel); // optionally add fmutate inputs if user specified a function if (mutate_fp != nullptr) From bd400982be31b8ae2883f7a123dcec2466f5403e Mon Sep 17 00:00:00 2001 From: guanxinq Date: Mon, 16 Mar 2020 18:08:50 +0000 Subject: [PATCH 20/26] Set inferSType as optional function --- example/extensions/lib_custom_op/gemm_lib.cc | 18 ---------- example/extensions/lib_custom_op/relu_lib.cu | 9 ----- .../lib_custom_op/transposecsr_lib.cc | 8 +++-- src/c_api/c_api.cc | 35 ++++++++++++------- 4 files changed, 28 insertions(+), 42 deletions(-) diff --git a/example/extensions/lib_custom_op/gemm_lib.cc b/example/extensions/lib_custom_op/gemm_lib.cc index c34c0ac84df8..daeac337f4d6 100644 --- a/example/extensions/lib_custom_op/gemm_lib.cc +++ b/example/extensions/lib_custom_op/gemm_lib.cc @@ -140,22 +140,6 @@ MXReturnValue inferType(std::map attrs, return MX_SUCCESS; } -MXReturnValue inferSType(std::map attrs, - std::vector &instypes, - std::vector &outstypes) { - // validate inputs - for (unsigned i = 0; i < instypes.size(); i++) { - if (instypes[i] != kDefaultStorage) { - std::cout << "Expected input " << i << " to have dense storage type" << std::endl; - return MX_FAIL; - } - } - for (unsigned i = 0; i < instypes.size(); i++) { - outstypes[i] = instypes[0]; - } - return MX_SUCCESS; -} - MXReturnValue inferShape(std::map attrs, std::vector> &inshapes, std::vector> &outshapes) { @@ -187,7 +171,6 @@ REGISTER_OP(my_gemm) .setBackward(backward, "cpu") .setParseAttrs(parseAttrs) .setInferType(inferType) -.setInferSType(inferSType) .setInferShape(inferShape); /* ------------------------------------------------------------------------- */ @@ -236,7 +219,6 @@ MXReturnValue mutateInputs(std::map attrs, REGISTER_OP(state_gemm) .setParseAttrs(parseAttrs) .setInferType(inferType) -.setInferSType(inferSType) .setInferShape(inferShape) .setMutateInputs(mutateInputs) .setCreateOpState(createOpState, "cpu"); diff --git a/example/extensions/lib_custom_op/relu_lib.cu b/example/extensions/lib_custom_op/relu_lib.cu index ee57ce38a429..3beb68c20fa7 100644 --- a/example/extensions/lib_custom_op/relu_lib.cu +++ b/example/extensions/lib_custom_op/relu_lib.cu @@ -109,13 +109,6 @@ MXReturnValue inferType(std::map attrs, return MX_SUCCESS; } -MXReturnValue inferSType(std::map attrs, - std::vector &instypes, - std::vector &outstypes) { - outstypes[0] = instypes[0]; - return MX_SUCCESS; -} - MXReturnValue inferShape(std::map attrs, std::vector> &inshapes, std::vector> &outshapes) { @@ -126,7 +119,6 @@ MXReturnValue inferShape(std::map attrs, REGISTER_OP(my_relu) .setParseAttrs(parseAttrs) .setInferType(inferType) -.setInferSType(inferSType) .setInferShape(inferShape) .setForward(forwardCPU, "cpu") .setForward(forwardGPU, "gpu") @@ -184,7 +176,6 @@ MXReturnValue createOpStateGPU(std::map attrs, REGISTER_OP(my_state_relu) .setParseAttrs(parseAttrs) .setInferType(inferType) -.setInferSType(inferSType) .setInferShape(inferShape) .setCreateOpState(createOpStateCPU, "cpu") .setCreateOpState(createOpStateGPU, "gpu"); diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc index eb1cec3a9eff..e7beb047d21d 100644 --- a/example/extensions/lib_custom_op/transposecsr_lib.cc +++ b/example/extensions/lib_custom_op/transposecsr_lib.cc @@ -34,22 +34,26 @@ void transpose(MXTensor src, MXTensor dst, OpResource res) { int64_t w = shape[1]; if(src.stype == kCSRStorage) { float *Aval = (float*) (A->data); + // Here we need one more element to help calculate index(line 57). std::vector rowPtr(w + 2, 0); // count column for(int i = 0; i < A->data_len; i++) { rowPtr[A->indices[i] + 2]++; } - // Accumulated sum + // Accumulated sum. After this for loop, rowPtr[1:w+2) stores the correct + // result of transposed rowPtr. for(int i = 2; i < rowPtr.size(); i++) { rowPtr[i] += rowPtr[i - 1]; } - + // Alloc memory for sparse data, where 0 is the index // of B in output vector. res.alloc_sparse(B, 0, A->data_len, w + 1); float *Bval = (float*) (B->data); for(int i = 0; i < h; i++) { for(int j = A->indptr[i]; j < A->indptr[i + 1]; j++) { + // Helps calculate index and after that rowPtr[0:w+1) stores the + // correct result of transposed rowPtr. int index = rowPtr[A->indices[j] + 1]++; Bval[index] = Aval[j]; B->indices[index] = i; diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 9276813bfde9..9176e577bc48 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -413,8 +413,6 @@ int MXLoadLib(const char *path) { << "' custom op, Forward or CreateOpState function was not set."; CHECK(type_fp != nullptr) << "Error loading '" << name << "' custom op, InferType function was not set."; - CHECK(stype_fp != nullptr) << "Error loading '" << name - << "' custom op, InferSType function was not set."; CHECK(shape_fp != nullptr) << "Error loading '" << name << "' custom op, InferShape function was not set."; } else { @@ -657,18 +655,29 @@ int MXLoadLib(const char *path) { // output types will be populated by inferType function std::vector outstypes(out_stypes->size()); - CHECK(callInferSType(stype_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(), - instypes.data(), in_stypes->size(), - outstypes.data(), out_stypes->size())) - << "Error calling InferSType for custom operator '" << name_str << "'"; - - // copy and assign output storage types from custom op to MXNet memory. - for (size_t i = 0; i < out_stypes->size(); i++) { - STORAGE_TYPE_ASSIGN_CHECK(*out_stypes, i, outstypes[i]); + // InferSType is not defineid in customized lib. + if (stype_fp == nullptr) { + CHECK(mxnet::common::ContainsOnlyStorage(*in_stypes, mxnet::kDefaultStorage)) + << "Error input tensors are not dense for custom operator '" << name_str << "'"; + // set outputs as dense + return op::storage_type_assign(out_stypes, mxnet::kDefaultStorage, + dispatch_mode, DispatchMode::kFComputeEx); + } + // InferSType is defineid in customized lib. + else { + CHECK(callInferSType(stype_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(), + instypes.data(), in_stypes->size(), + outstypes.data(), out_stypes->size())) + << "Error calling InferSType for custom operator '" << name_str << "'"; + + // copy and assign output storage types from custom op to MXNet memory. + for (size_t i = 0; i < out_stypes->size(); i++) { + STORAGE_TYPE_ASSIGN_CHECK(*out_stypes, i, outstypes[i]); + } + // assign dispatch mode + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); + return true; } - // assign dispatch mode - DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); - return true; }; // FGradient register lambda From 7e95dcac0064e4be92080cf8c7450cac8a91c106 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 17 Mar 2020 17:37:37 +0000 Subject: [PATCH 21/26] Resolve comments --- .../lib_custom_op/transposecsr_lib.cc | 2 ++ .../lib_custom_op/transposerowsp_lib.cc | 5 ++++ include/mxnet/lib_api.h | 3 ++- src/c_api/c_api.cc | 25 +++++++++---------- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc index e7beb047d21d..79c6f3419eb7 100644 --- a/example/extensions/lib_custom_op/transposecsr_lib.cc +++ b/example/extensions/lib_custom_op/transposecsr_lib.cc @@ -116,6 +116,8 @@ MXReturnValue inferType(std::map attrs, MXReturnValue inferSType(std::map attrs, std::vector &instypes, std::vector &outstypes) { + if (instypes[0] != kCSRStorage) + return MX_FAIL; outstypes[0] = instypes[0]; return MX_SUCCESS; } diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc index b8541d799138..aa20b24ea7d6 100644 --- a/example/extensions/lib_custom_op/transposerowsp_lib.cc +++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc @@ -34,6 +34,8 @@ void transpose(MXTensor src, MXTensor dst, OpResource res) { int64_t h = shape[0]; int64_t w = shape[1]; if(src.stype == kRowSparseStorage) { + // Keys of the map is the row index of transposed tensors. + // Values of the map is the rows which have non-zero elements. std::map> mp; float *Aval = (float*) (A->data); for(int i = 0; i < A->data_len; i++) { @@ -51,6 +53,7 @@ void transpose(MXTensor src, MXTensor dst, OpResource res) { } } + // Alloc memory for output tensors. res.alloc_sparse(B, 0, mp.size()); float *Bval = (float*) (B->data); int didx = 0, iidx = 0; @@ -115,6 +118,8 @@ MXReturnValue inferType(std::map attrs, MXReturnValue inferSType(std::map attrs, std::vector &instypes, std::vector &outstypes) { + if (instypes[0] != kRowSparseStorage) + return MX_FAIL; outstypes[0] = instypes[0]; return MX_SUCCESS; } diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 132faee729cc..1b0ca053c118 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -465,8 +465,9 @@ class OpResource { void *cpu_alloc, *gpu_alloc; /*! \brief cuda stream passed from MXNet */ void *cuda_stream; - + /*! \brief sparse allocation lambda function */ sparse_malloc_t sparse_malloc; + /*! \brief lambda function to return allocated sparse memory handle */ void *sparse_alloc; }; diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 9176e577bc48..f70062e1272e 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -643,18 +643,6 @@ int MXLoadLib(const char *path) { DispatchMode* dispatch_mode, std::vector* in_stypes, std::vector* out_stypes) { - // convert attributes to vector of char* - std::vector attr_keys, attr_vals; - for (auto kv : attrs.dict) { - attr_keys.push_back(kv.first.c_str()); - attr_vals.push_back(kv.second.c_str()); - } - // copy input types from in_stype - std::vector instypes(*in_stypes); - - // output types will be populated by inferType function - std::vector outstypes(out_stypes->size()); - // InferSType is not defineid in customized lib. if (stype_fp == nullptr) { CHECK(mxnet::common::ContainsOnlyStorage(*in_stypes, mxnet::kDefaultStorage)) @@ -663,8 +651,19 @@ int MXLoadLib(const char *path) { return op::storage_type_assign(out_stypes, mxnet::kDefaultStorage, dispatch_mode, DispatchMode::kFComputeEx); } - // InferSType is defineid in customized lib. + // InferSType is defined in customized lib. else { + // convert attributes to vector of char* + std::vector attr_keys, attr_vals; + for (auto kv : attrs.dict) { + attr_keys.push_back(kv.first.c_str()); + attr_vals.push_back(kv.second.c_str()); + } + // copy input types from in_stype + std::vector instypes(*in_stypes); + + // output types will be populated by inferType function + std::vector outstypes(out_stypes->size()); CHECK(callInferSType(stype_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(), instypes.data(), in_stypes->size(), outstypes.data(), out_stypes->size())) From 3f963f5f28230ff823a1ae8e0e1e63c7e0e2b8bc Mon Sep 17 00:00:00 2001 From: guanxinq Date: Tue, 17 Mar 2020 19:43:47 +0000 Subject: [PATCH 22/26] Add error messages --- example/extensions/lib_custom_op/transposecsr_lib.cc | 4 +++- example/extensions/lib_custom_op/transposerowsp_lib.cc | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc index 79c6f3419eb7..6363e2d64cf7 100644 --- a/example/extensions/lib_custom_op/transposecsr_lib.cc +++ b/example/extensions/lib_custom_op/transposecsr_lib.cc @@ -116,8 +116,10 @@ MXReturnValue inferType(std::map attrs, MXReturnValue inferSType(std::map attrs, std::vector &instypes, std::vector &outstypes) { - if (instypes[0] != kCSRStorage) + if (instypes[0] != kCSRStorage) { + std::cout << "Expected storage type is kCSRStorage" << std::endl; return MX_FAIL; + } outstypes[0] = instypes[0]; return MX_SUCCESS; } diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc index aa20b24ea7d6..29a8721ad727 100644 --- a/example/extensions/lib_custom_op/transposerowsp_lib.cc +++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc @@ -118,8 +118,10 @@ MXReturnValue inferType(std::map attrs, MXReturnValue inferSType(std::map attrs, std::vector &instypes, std::vector &outstypes) { - if (instypes[0] != kRowSparseStorage) + if (instypes[0] != kRowSparseStorage) { + std::cout << "Expected storage type is kRowSparseStorage" << std::endl; return MX_FAIL; + } outstypes[0] = instypes[0]; return MX_SUCCESS; } From 0eb1de989df5047fe44701c7dd7cd109b12e527f Mon Sep 17 00:00:00 2001 From: guanxinq Date: Wed, 18 Mar 2020 17:32:15 +0000 Subject: [PATCH 23/26] Resolve comments --- example/extensions/lib_custom_op/test_transposecsr.py | 4 ++-- .../extensions/lib_custom_op/test_transposerowsp.py | 5 ++--- example/extensions/lib_custom_op/transposecsr_lib.cc | 10 ++++------ example/extensions/lib_custom_op/transposerowsp_lib.cc | 10 ++++------ include/mxnet/lib_api.h | 2 ++ 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/example/extensions/lib_custom_op/test_transposecsr.py b/example/extensions/lib_custom_op/test_transposecsr.py index be2c470f90aa..8421415d09f6 100644 --- a/example/extensions/lib_custom_op/test_transposecsr.py +++ b/example/extensions/lib_custom_op/test_transposecsr.py @@ -49,7 +49,7 @@ print("indptr:", b.indptr.asnumpy()) print("Stateful Compute Result:") -c = mx.nd.state_transposecsr(a, test_kw=100) +c = mx.nd.my_state_transposecsr(a, test_kw=100) print("data:", c.data.asnumpy()) print("indices:", c.indices.asnumpy()) print("indptr:", c.indptr.asnumpy()) @@ -57,7 +57,7 @@ print("--------start symbolic compute--------") d = mx.sym.Variable('d') e = mx.sym.my_transposecsr(d) -f = mx.sym.state_transposecsr(d, test_kw=200) +f = mx.sym.my_state_transposecsr(d, test_kw=200) exe = e.bind(ctx=mx.cpu(),args={'d':a}) exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) diff --git a/example/extensions/lib_custom_op/test_transposerowsp.py b/example/extensions/lib_custom_op/test_transposerowsp.py index 8c00095a3589..a4e513525334 100644 --- a/example/extensions/lib_custom_op/test_transposerowsp.py +++ b/example/extensions/lib_custom_op/test_transposerowsp.py @@ -35,7 +35,6 @@ mx.library.load(path) a = mx.nd.array([[1,2,3],[0,0,0],[4,0,5],[0,0,0],[0,0,0]]) -# a = mx.nd.array([[1,3,0,2,1],[0,1,0,0,0],[0,2,4,5,3]]) a = a.tostype('row_sparse') print("--------Input CSR Array---------") print("data:", a.data.asnumpy()) @@ -48,14 +47,14 @@ print("indices:", b.indices.asnumpy()) print("Stateful Compute Result:") -c = mx.nd.state_transposerowsp(a, test_kw=100) +c = mx.nd.my_state_transposerowsp(a, test_kw=100) print("data:", c.data.asnumpy()) print("indices:", c.indices.asnumpy()) print("--------start symbolic compute--------") d = mx.sym.Variable('d') e = mx.sym.my_transposerowsp(d) -f = mx.sym.state_transposerowsp(d, test_kw=200) +f = mx.sym.my_state_transposerowsp(d, test_kw=200) exe = e.bind(ctx=mx.cpu(),args={'d':a}) exe2 = f.bind(ctx=mx.cpu(),args={'d':a}) diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc index 6363e2d64cf7..0daeb3e9f83e 100644 --- a/example/extensions/lib_custom_op/transposecsr_lib.cc +++ b/example/extensions/lib_custom_op/transposecsr_lib.cc @@ -102,11 +102,9 @@ MXReturnValue inferType(std::map attrs, std::cout << "Expected 1 inputs to inferType" << std::endl; return MX_FAIL; } - for (unsigned i = 0; i < intypes.size(); i++) { - if (intypes[i] != kFloat32) { - std::cout << "Expected input " << i << " to have float32 type" << std::endl; - return MX_FAIL; - } + if (intypes[0] != kFloat32) { + std::cout << "Expected input to have float32 type" << std::endl; + return MX_FAIL; } outtypes[0] = intypes[0]; @@ -181,7 +179,7 @@ MXReturnValue createOpState(std::map attrs, return MX_SUCCESS; } -REGISTER_OP(state_transposecsr) +REGISTER_OP(my_state_transposecsr) .setParseAttrs(parseAttrs) .setInferType(inferType) .setInferSType(inferSType) diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc index 29a8721ad727..883d816cfa81 100644 --- a/example/extensions/lib_custom_op/transposerowsp_lib.cc +++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc @@ -104,11 +104,9 @@ MXReturnValue inferType(std::map attrs, std::cout << "Expected 1 inputs to inferType" << std::endl; return MX_FAIL; } - for (unsigned i = 0; i < intypes.size(); i++) { - if (intypes[i] != kFloat32) { - std::cout << "Expected input " << i << " to have float32 type" << std::endl; - return MX_FAIL; - } + if (intypes[0] != kFloat32) { + std::cout << "Expected input to have float32 type" << std::endl; + return MX_FAIL; } outtypes[0] = intypes[0]; @@ -183,7 +181,7 @@ MXReturnValue createOpState(std::map attrs, return MX_SUCCESS; } -REGISTER_OP(state_transposerowsp) +REGISTER_OP(my_state_transposerowsp) .setParseAttrs(parseAttrs) .setInferType(inferType) .setInferSType(inferSType) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 1b0ca053c118..7705b964b389 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -453,6 +453,7 @@ class OpResource { return static_cast(cuda_stream); } + /*! \brief allocate sparse memory controlled by MXNet */ void alloc_sparse(MXSparse* sparse, int index, int indices_len, int indptr_len = 0) { sparse_malloc(sparse_alloc, index, indices_len, indptr_len, &(sparse->data), &(sparse->indices), &(sparse->indptr)); @@ -991,6 +992,7 @@ typedef int (*opCallFStatefulComp_t)(int is_forward, void* state_op, void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes); + #define MXLIB_PARTREGSIZE_STR "_partRegSize" typedef int (*partRegSize_t)(void); From 79d7d642119082066f3836d2694ebb0d35adeeef Mon Sep 17 00:00:00 2001 From: guanxinq Date: Wed, 18 Mar 2020 22:00:10 +0000 Subject: [PATCH 24/26] verify transpose ops results --- example/extensions/lib_custom_op/test_transposecsr.py | 2 ++ example/extensions/lib_custom_op/test_transposerowsp.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/example/extensions/lib_custom_op/test_transposecsr.py b/example/extensions/lib_custom_op/test_transposecsr.py index 8421415d09f6..37d066a7bec2 100644 --- a/example/extensions/lib_custom_op/test_transposecsr.py +++ b/example/extensions/lib_custom_op/test_transposecsr.py @@ -74,3 +74,5 @@ print("indices:", out2[0].indices.asnumpy()) print("indptr:", out2[0].indptr.asnumpy()) +print("--------Baseline(dense)--------") +print(mx.nd.transpose(a.tostype('default'))) diff --git a/example/extensions/lib_custom_op/test_transposerowsp.py b/example/extensions/lib_custom_op/test_transposerowsp.py index a4e513525334..cea62ec6e98c 100644 --- a/example/extensions/lib_custom_op/test_transposerowsp.py +++ b/example/extensions/lib_custom_op/test_transposerowsp.py @@ -68,3 +68,6 @@ print("Stateful Compute Result:") print("data:", out2[0].data.asnumpy()) print("indices:", out2[0].indices.asnumpy()) + +print("--------Baseline(dense)--------") +print(mx.nd.transpose(a.tostype('default'))) From 9dcb604a8a6f5f0cb7f1dcf120d11aa2ecca01c8 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Thu, 19 Mar 2020 16:59:49 +0000 Subject: [PATCH 25/26] fix sanity check --- include/mxnet/lib_api.h | 112 ++++++++++++++++++++-------------------- src/c_api/c_api.cc | 45 ++++++++-------- 2 files changed, 76 insertions(+), 81 deletions(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index fa08ab64ec29..a55d25994421 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -220,7 +220,7 @@ enum MXDType { enum MXStorageType { // dense kDefaultStorage = 0, - // row sparse + // row sparse kRowSparseStorage = 1, // csr kCSRStorage = 2, @@ -255,7 +255,7 @@ struct MXSparse { int64_t indices_len; // For CSR, indptr gives the start and end index of data for each row. - // For row sparse, indptr is not used. + // For row sparse, indptr is not used. int64_t* indptr = nullptr; int64_t indptr_len; @@ -265,16 +265,16 @@ struct MXSparse { // If CSR, num of non-zero elemets is num_idx, // If row sparse, num of elements is num_idx * width. data_len = num_idx; - if(!idx_ptr) { - for(int i = 1; i < ndims; ++i) + if (!idx_ptr) { + for (int i = 1; i < ndims; ++i) data_len *= dims[i]; } - indices = (int64_t*)idx; + indices = reinterpret_cast(idx); indices_len = num_idx; - if(idx_ptr) { - indptr = (int64_t*)idx_ptr; + if (idx_ptr) { + indptr = reinterpret_cast(idx_ptr); indptr_len = num_idx_ptr; } } @@ -387,7 +387,7 @@ struct MXTensor { verID == oth.verID && ctx.dev_type == oth.ctx.dev_type && ctx.dev_id == oth.ctx.dev_id && - shape == oth.shape && + shape == oth.shape && stype == oth.stype; } @@ -455,7 +455,7 @@ class OpResource { /*! \brief allocate sparse memory controlled by MXNet */ void alloc_sparse(MXSparse* sparse, int index, int indices_len, int indptr_len = 0) { - sparse_malloc(sparse_alloc, index, indices_len, indptr_len, + sparse_malloc(sparse_alloc, index, indices_len, indptr_len, &(sparse->data), &(sparse->indices), &(sparse->indptr)); } @@ -700,8 +700,8 @@ typedef MXReturnValue (*createOpState_t)(std::map, class CustomOp { public: explicit CustomOp(const char* op_name) : name(op_name), - parse_attrs(NULL), infer_type(NULL), infer_storage_type(NULL), - infer_shape(NULL), mutate_inputs(NULL), isSGop(false) {} + parse_attrs(NULL), infer_type(NULL), infer_storage_type(NULL), infer_shape(NULL), + mutate_inputs(NULL), isSGop(false) {} CustomOp& setForward(fcomp_t fcomp, const char* ctx) { if (forward_ctx_map.count(ctx) > 0) raiseDuplicateContextError(); @@ -1186,13 +1186,13 @@ extern "C" { return retval; } - /*! \brief returns status of calling inferSType function for operator from library */ + /*! \brief returns status of calling inferSType function for operator from library */ #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) __declspec(dllexport) int __cdecl #else int #endif - _opCallInferSType(inferSType_t inferSType, const char* const* keys, + _opCallInferSType(inferSType_t inferSType, const char* const* keys, const char* const* vals, int num, int* instypes, int num_in, int* outstypes, int num_out) { // create map of attributes from list @@ -1239,7 +1239,7 @@ extern "C" { sparse_malloc_t sparse_malloc, void* sparse_alloc, int* instypes, int* outstypes, void** in_indices, void** out_indices, void** in_indptr, void** out_indptr, - int64_t* in_indices_shapes, int64_t* out_indices_shapes, + int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { // create map of attributes from list std::map attrs; @@ -1254,24 +1254,22 @@ extern "C" { for (int i = 0; i < num_in; i++) { // Dense representation. - if(instypes[i] == 0) { + if (instypes[i] == 0) { inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); - } - // Sparse representation. - else { + } else { + // Sparse representation. MXStorageType type; - if(instypes[i] == 1) { + if (instypes[i] == 1) { type = kRowSparseStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); - } - else { + } else { type = kCSRStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); } - inputs[i].setTensor((void*)(&in_sparse[i]), (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}, type); + inputs[i].setTensor(reinterpret_cast(&in_sparse[i]), (MXDType)intypes[i], + inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, type); } } @@ -1281,28 +1279,29 @@ extern "C" { for (int i = 0; i < num_out; i++) { // Dense representation. - if(outstypes[i] == 0) { + if (outstypes[i] == 0) { outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); - } - // Sparse representation. - else { + } else { + // Sparse representation. MXStorageType type; - if(outstypes[i] == 1) { + if (outstypes[i] == 1) { type = kRowSparseStorage; - out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); - } - else { + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], + out_indices[i], out_indices_shapes[i]); + } else { type = kCSRStorage; out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); } - outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, type); + outputs[i].setTensor(reinterpret_cast(&out_sparse[i]), (MXDType)outtypes[i], + outshapes[i], outdims[i], outIDs[i], {outdev_type[i], + outdev_id[i]}, type); } } - OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, cuda_stream, sparse_malloc, sparse_alloc); + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, + cuda_stream, sparse_malloc, sparse_alloc); return fcomp(attrs, inputs, outputs, res); } @@ -1377,32 +1376,30 @@ extern "C" { void** in_indptr, void** out_indptr, int64_t* in_indices_shapes, int64_t* out_indices_shapes, int64_t* in_indptr_shapes, int64_t* out_indptr_shapes) { - // create a vector of tensors for inputs std::vector inputs(num_in); // create a vector for sparse inputs std::vector in_sparse(num_in); for (int i = 0; i < num_in; i++) { - // Dense representation. - if(instypes[i] == 0) { + if (instypes[i] == 0) { + // Dense representation. inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i], inIDs[i], {indev_type[i], indev_id[i]}, kDefaultStorage); - } - // Sparse representation. - else { + } else { + // Sparse representation. MXStorageType type; - if(instypes[i] == 1) { + if (instypes[i] == 1) { type = kRowSparseStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]); - } - else { + } else { type = kCSRStorage; in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]); } - inputs[i].setTensor((void*)(&in_sparse[i]), (MXDType)intypes[i], inshapes[i], indims[i], - inIDs[i], {indev_type[i], indev_id[i]}, type); + inputs[i].setTensor(reinterpret_cast(&in_sparse[i]), (MXDType)intypes[i], + inshapes[i], indims[i], inIDs[i], {indev_type[i], + indev_id[i]}, type); } } @@ -1412,29 +1409,30 @@ extern "C" { std::vector out_sparse(num_out); for (int i = 0; i < num_out; i++) { - // Dense representation. - if(outstypes[i] == 0) { + if (outstypes[i] == 0) { + // Dense representation. outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i], outIDs[i], {outdev_type[i], outdev_id[i]}, kDefaultStorage); - } - // Sparse representation. - else { + } else { + // Sparse representation. MXStorageType type; - if(outstypes[i] == 1) { + if (outstypes[i] == 1) { type = kRowSparseStorage; - out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i]); - } - else { + out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], + out_indices_shapes[i]); + } else { type = kCSRStorage; out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i], out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]); } - outputs[i].setTensor((void*)(&out_sparse[i]), (MXDType)outtypes[i], outshapes[i], outdims[i], - outIDs[i], {outdev_type[i], outdev_id[i]}, type); + outputs[i].setTensor(reinterpret_cast(&out_sparse[i]), (MXDType)outtypes[i], + outshapes[i], outdims[i], outIDs[i], {outdev_type[i], + outdev_id[i]}, type); } } - OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, stream, sparse_malloc, sparse_alloc); + OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc, + stream, sparse_malloc, sparse_alloc); CustomStatefulOp* op_ptr = reinterpret_cast(state_op); if (is_forward) { diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 7af094fd8a34..911ac90e9391 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -138,13 +138,12 @@ void CustomFComputeDispatcher(const std::string op_name, const char* ctx_str = inputs[i].ctx().dev_mask() == Context::kCPU ? "cpu" : "gpu"; in_dev_type.push_back(ctx_str); in_dev_id.push_back(inputs[i].ctx().real_dev_id()); - - if(inputs[i].storage_type() == mxnet::kRowSparseStorage) { + + if (inputs[i].storage_type() == mxnet::kRowSparseStorage) { in_stypes[i] = 1; in_indices[i] = inputs[i].aux_data(rowsparse::kIdx).dptr_; in_indices_shapes[i] = inputs[i].aux_shape(rowsparse::kIdx).Size(); - } - else if(inputs[i].storage_type() == mxnet::kCSRStorage) { + } else if (inputs[i].storage_type() == mxnet::kCSRStorage) { in_stypes[i] = 2; in_indices[i] = inputs[i].aux_data(csr::kIdx).dptr_; in_indptr[i] = inputs[i].aux_data(csr::kIndPtr).dptr_; @@ -163,12 +162,11 @@ void CustomFComputeDispatcher(const std::string op_name, out_dev_type.push_back(ctx_str); out_dev_id.push_back(outputs[i].ctx().real_dev_id()); - if(outputs[i].storage_type() == mxnet::kRowSparseStorage) { + if (outputs[i].storage_type() == mxnet::kRowSparseStorage) { out_stypes[i] = 1; out_indices[i] = outputs[i].aux_data(rowsparse::kIdx).dptr_; out_indices_shapes[i] = outputs[i].aux_shape(rowsparse::kIdx).Size(); - } - else if(outputs[i].storage_type() == mxnet::kCSRStorage) { + } else if (outputs[i].storage_type() == mxnet::kCSRStorage) { out_stypes[i] = 2; out_indices[i] = outputs[i].aux_data(csr::kIdx).dptr_; out_indptr[i] = outputs[i].aux_data(csr::kIndPtr).dptr_; @@ -199,18 +197,17 @@ void CustomFComputeDispatcher(const std::string op_name, // returns allocated arrays for data, indices and indptr. auto sparse_alloc = [&](int index, int indices_len, int idxptr_len, void** data, int64_t** indices, int64_t** indptr) { - // Row Sparse - if(idxptr_len == 0) { + if (idxptr_len == 0) { + // Row Sparse outputs[index].CheckAndAlloc({mshadow::Shape1(indices_len)}); *data = outputs[index].data().dptr_; - *indices = (int64_t*)outputs[index].aux_data(rowsparse::kIdx).dptr_; - } - // CSR - else { + *indices = reinterpret_cast(outputs[index].aux_data(rowsparse::kIdx).dptr_); + } else { + // CSR outputs[index].CheckAndAlloc({mshadow::Shape1(idxptr_len), mshadow::Shape1(indices_len)}); *data = outputs[index].data().dptr_; - *indices = (int64_t*)outputs[index].aux_data(csr::kIdx).dptr_; - *indptr = (int64_t*)outputs[index].aux_data(csr::kIndPtr).dptr_; + *indices = reinterpret_cast(outputs[index].aux_data(csr::kIdx).dptr_); + *indptr = reinterpret_cast(outputs[index].aux_data(csr::kIndPtr).dptr_); } }; @@ -231,7 +228,7 @@ void CustomFComputeDispatcher(const std::string op_name, }; typedef decltype(sparse_alloc) alloc_type_sparse; - auto sparse_malloc = [](void* _sparse_alloc, int index, int indices_len, int idxptr_len, + auto sparse_malloc = [](void* _sparse_alloc, int index, int indices_len, int idxptr_len, void** data, int64_t** indices, int64_t** indptr) { alloc_type_sparse* sparsealloc = static_cast(_sparse_alloc); (*sparsealloc)(index, indices_len, idxptr_len, data, indices, indptr); @@ -263,10 +260,10 @@ void CustomFComputeDispatcher(const std::string op_name, in_verIDs.data(), in_dev_type.data(), in_dev_id.data(), in_data.size(), out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(), out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(), - cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, + cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(), - in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), - in_indices_shapes.data(), out_indices_shapes.data(), + in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices_shapes.data(), out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FCompute for custom operator '" << op_name << "'"; } @@ -288,7 +285,8 @@ void CustomFComputeDispatcher(const std::string op_name, out_data.size(), cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream, sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(), - in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(), + in_indices.data(), out_indices.data(), + in_indptr.data(), out_indptr.data(), in_indices_shapes.data(), out_indices_shapes.data(), in_indptr_shapes.data(), out_indptr_shapes.data())) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"; @@ -644,16 +642,15 @@ int MXLoadLib(const char *path) { DispatchMode* dispatch_mode, std::vector* in_stypes, std::vector* out_stypes) { - // InferSType is not defineid in customized lib. if (stype_fp == nullptr) { + // InferSType is not defineid in customized lib. CHECK(mxnet::common::ContainsOnlyStorage(*in_stypes, mxnet::kDefaultStorage)) << "Error input tensors are not dense for custom operator '" << name_str << "'"; // set outputs as dense return op::storage_type_assign(out_stypes, mxnet::kDefaultStorage, dispatch_mode, DispatchMode::kFComputeEx); - } - // InferSType is defined in customized lib. - else { + } else { + // InferSType is defined in customized lib. // convert attributes to vector of char* std::vector attr_keys, attr_vals; for (auto kv : attrs.dict) { From 7f39b85bb7c95a1594d5352cef01e7b322dd57f4 Mon Sep 17 00:00:00 2001 From: guanxinq Date: Fri, 20 Mar 2020 06:17:36 +0000 Subject: [PATCH 26/26] update MX_LIBRARY_VERSION to 5 --- include/mxnet/lib_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h index 8de93a07de61..fd526ee4172f 100644 --- a/include/mxnet/lib_api.h +++ b/include/mxnet/lib_api.h @@ -39,7 +39,7 @@ #include #include -#define MX_LIBRARY_VERSION 4 +#define MX_LIBRARY_VERSION 5 /*! * \brief For loading multiple custom op libraries in Linux, exporting same symbol multiple