diff --git a/R-package/src/base.h b/R-package/src/base.h index 8645d8576b0e..6d15b8b1a0ce 100644 --- a/R-package/src/base.h +++ b/R-package/src/base.h @@ -354,8 +354,8 @@ inline std::vector SafeGetListNames(const Rcpp::List& src) { * \param rshape The dimension in R * \return A internal vector representation of shapes in mxnet. */ -inline std::vector Dim2InternalShape(const Rcpp::Dimension &rshape) { - std::vector shape(rshape.size()); +inline std::vector Dim2InternalShape(const Rcpp::Dimension &rshape) { + std::vector shape(rshape.size()); for (size_t i = 0; i < rshape.size(); ++i) { shape[rshape.size() - i - 1] = rshape[i]; } diff --git a/R-package/src/ndarray.cc b/R-package/src/ndarray.cc index 94d24f3fb46b..fdcac7e5079f 100644 --- a/R-package/src/ndarray.cc +++ b/R-package/src/ndarray.cc @@ -180,7 +180,7 @@ Rcpp::RObject NDArrayPacker::CreateNDArrayPacker() { Rcpp::Dimension NDArray::dim() const { mx_uint ndim; - const mx_uint *pshape; + const dim_t *pshape; MX_CALL(MXNDArrayGetShape( ptr_->handle, &ndim, &pshape)); Rcpp::IntegerVector dat(pshape, pshape + ndim); @@ -190,7 +190,7 @@ Rcpp::Dimension NDArray::dim() const { } NDArray NDArray::Clone() const { - std::vector shape = Dim2InternalShape(this->dim()); + std::vector shape = Dim2InternalShape(this->dim()); Context ctx = this->ctx(); NDArrayHandle handle; MX_CALL(MXNDArrayCreate(dmlc::BeginPtr(shape), @@ -276,7 +276,7 @@ Rcpp::List NDArray::Load(const std::string& filename) { NDArray::RObjectType NDArray::Empty( const Rcpp::Dimension& rshape, const Context::RObjectType& rctx) { - std::vector shape = Dim2InternalShape(rshape); + std::vector shape = Dim2InternalShape(rshape); Context ctx(rctx); NDArrayHandle handle; MX_CALL(MXNDArrayCreate(dmlc::BeginPtr(shape), diff --git a/R-package/src/symbol.cc b/R-package/src/symbol.cc index 031c9a254019..c70bf794f39b 100644 --- a/R-package/src/symbol.cc +++ b/R-package/src/symbol.cc @@ -168,7 +168,7 @@ Symbol::RObjectType Symbol::GetOutput(mx_uint index) const { // helper function to convert shape into Rcpp vector inline Rcpp::List BuildShapeData(mx_uint shape_size, const mx_uint *shape_ndim, - const mx_uint **shape_data, + const dim_t **shape_data, const std::vector &names) { Rcpp::List ret(shape_size); for (mx_uint i = 0; i < shape_size; ++i) { @@ -185,12 +185,12 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const { << "Need to pass parameters in key=value style.\n"; std::vector keys = kwargs.names(); std::vector arg_ind_ptr(1, 0); - std::vector arg_shape_data; + std::vector arg_shape_data; for (size_t i = 0; i < kwargs.size(); ++i) { RCHECK(keys[i].length() != 0) << "Need to pass parameters in key=value style.\n"; - std::vector dim = Dim2InternalShape(kwargs[i]); + std::vector dim = Dim2InternalShape(kwargs[i]); arg_shape_data.insert(arg_shape_data.end(), dim.begin(), dim.end()); arg_ind_ptr.push_back(static_cast(arg_shape_data.size())); } @@ -198,13 +198,13 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const { mx_uint in_shape_size; const mx_uint *in_shape_ndim; - const mx_uint **in_shape_data; + const dim_t **in_shape_data; mx_uint out_shape_size; const mx_uint *out_shape_ndim; - const mx_uint **out_shape_data; + const dim_t **out_shape_data; mx_uint aux_shape_size; const mx_uint *aux_shape_ndim; - const mx_uint **aux_shape_data; + const dim_t **aux_shape_data; int complete; MX_CALL(MXSymbolInferShape( diff --git a/cpp-package/include/mxnet-cpp/base.h b/cpp-package/include/mxnet-cpp/base.h index d0f1bea15f00..e98f805d66ab 100644 --- a/cpp-package/include/mxnet-cpp/base.h +++ b/cpp-package/include/mxnet-cpp/base.h @@ -34,7 +34,7 @@ namespace mxnet { namespace cpp { -typedef unsigned index_t; +typedef int64_t index_t; enum OpReqType { /*! \brief no operation, do not write anything */ diff --git a/cpp-package/include/mxnet-cpp/initializer.h b/cpp-package/include/mxnet-cpp/initializer.h index 021808b38e34..df34928e6387 100644 --- a/cpp-package/include/mxnet-cpp/initializer.h +++ b/cpp-package/include/mxnet-cpp/initializer.h @@ -167,7 +167,7 @@ class Xavier : public Initializer { Shape shape(arr->GetShape()); float hw_scale = 1.0f; if (shape.ndim() > 2) { - for (size_t i = 2; i < shape.ndim(); ++i) { + for (index_t i = 2; i < shape.ndim(); ++i) { hw_scale *= shape[i]; } } diff --git a/cpp-package/include/mxnet-cpp/ndarray.h b/cpp-package/include/mxnet-cpp/ndarray.h index 6f37d91aa68e..ce1095f1cb49 100644 --- a/cpp-package/include/mxnet-cpp/ndarray.h +++ b/cpp-package/include/mxnet-cpp/ndarray.h @@ -134,7 +134,7 @@ class NDArray { * \param constext context of NDArray * \param delay_alloc whether delay the allocation */ - NDArray(const std::vector &shape, const Context &context, + NDArray(const std::vector &shape, const Context &context, bool delay_alloc = true); /*! * \brief construct a new dynamic NDArray @@ -444,7 +444,7 @@ class NDArray { /*! * \return the shape of current NDArray, in the form of mx_uint vector */ - std::vector GetShape() const; + std::vector GetShape() const; /*! * \return the data type of current NDArray */ diff --git a/cpp-package/include/mxnet-cpp/ndarray.hpp b/cpp-package/include/mxnet-cpp/ndarray.hpp index 966cf75c9122..75ca89a40485 100644 --- a/cpp-package/include/mxnet-cpp/ndarray.hpp +++ b/cpp-package/include/mxnet-cpp/ndarray.hpp @@ -46,7 +46,7 @@ inline NDArray::NDArray() { inline NDArray::NDArray(const NDArrayHandle &handle) { blob_ptr_ = std::make_shared(handle); } -inline NDArray::NDArray(const std::vector &shape, const Context &context, +inline NDArray::NDArray(const std::vector &shape, const Context &context, bool delay_alloc) { NDArrayHandle handle; CHECK_EQ(MXNDArrayCreate(shape.data(), shape.size(), context.GetDeviceType(), @@ -396,11 +396,11 @@ inline size_t NDArray::Size() const { return ret; } -inline std::vector NDArray::GetShape() const { - const mx_uint *out_pdata; +inline std::vector NDArray::GetShape() const { + const index_t *out_pdata; mx_uint out_dim; MXNDArrayGetShape(blob_ptr_->handle_, &out_dim, &out_pdata); - std::vector ret; + std::vector ret; for (mx_uint i = 0; i < out_dim; ++i) { ret.push_back(out_pdata[i]); } diff --git a/cpp-package/include/mxnet-cpp/symbol.h b/cpp-package/include/mxnet-cpp/symbol.h index a25824cad602..80eec6376b48 100644 --- a/cpp-package/include/mxnet-cpp/symbol.h +++ b/cpp-package/include/mxnet-cpp/symbol.h @@ -161,10 +161,10 @@ class Symbol { * \param aux_shapes use to store the infered shapes of auxiliary states */ void InferShape( - const std::map > &arg_shapes, - std::vector > *in_shape, - std::vector > *aux_shape, - std::vector > *out_shape) const; + const std::map > &arg_shapes, + std::vector > *in_shape, + std::vector > *aux_shape, + std::vector > *out_shape) const; /*! * \brief List the arguments names. * diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp index b82e060ca8da..938d61883190 100644 --- a/cpp-package/include/mxnet-cpp/symbol.hpp +++ b/cpp-package/include/mxnet-cpp/symbol.hpp @@ -181,14 +181,14 @@ inline std::string Symbol::GetName() const { } inline void Symbol::InferShape( - const std::map > &arg_shapes, - std::vector > *in_shape, - std::vector > *aux_shape, - std::vector > *out_shape) const { + const std::map > &arg_shapes, + std::vector > *in_shape, + std::vector > *aux_shape, + std::vector > *out_shape) const { std::vector keys; std::vector arg_ind_ptr; - std::vector arg_shape_data; + std::vector arg_shape_data; for (const auto &arg : arg_shapes) { keys.push_back(arg.first.c_str()); @@ -201,13 +201,13 @@ inline void Symbol::InferShape( mx_uint in_shape_size; const mx_uint *in_shape_ndim; - const mx_uint **in_shape_data; + const index_t **in_shape_data; mx_uint out_shape_size; const mx_uint *out_shape_ndim; - const mx_uint **out_shape_data; + const index_t **out_shape_data; mx_uint aux_shape_size; const mx_uint *aux_shape_ndim; - const mx_uint **aux_shape_data; + const index_t **aux_shape_data; int complete; CHECK_EQ(MXSymbolInferShape(GetHandle(), keys.size(), keys.data(), @@ -220,19 +220,19 @@ inline void Symbol::InferShape( if (complete) { for (mx_uint i = 0; i < in_shape_size; ++i) { - in_shape->push_back(std::vector()); + in_shape->push_back(std::vector()); for (mx_uint j = 0; j < in_shape_ndim[i]; ++j) { (*in_shape)[i].push_back(in_shape_data[i][j]); } } for (mx_uint i = 0; i < aux_shape_size; ++i) { - aux_shape->push_back(std::vector()); + aux_shape->push_back(std::vector()); for (mx_uint j = 0; j < aux_shape_ndim[i]; ++j) { (*aux_shape)[i].push_back(aux_shape_data[i][j]); } } for (mx_uint i = 0; i < out_shape_size; ++i) { - out_shape->push_back(std::vector()); + out_shape->push_back(std::vector()); for (mx_uint j = 0; j < out_shape_ndim[i]; ++j) { (*out_shape)[i].push_back(out_shape_data[i][j]); } @@ -250,8 +250,8 @@ inline void Symbol::InferExecutorArrays( const std::map &aux_map) const { const auto arg_name_list = ListArguments(); - std::vector > in_shapes, aux_shapes, out_shapes; - std::map > arg_shapes; + std::vector > in_shapes, aux_shapes, out_shapes; + std::map > arg_shapes; for (const auto &arg_name : arg_name_list) { auto iter = args_map.find(arg_name); @@ -307,8 +307,8 @@ inline void Symbol::InferArgsMap( const std::map &known_args) const { const auto arg_name_list = ListArguments(); - std::vector > in_shapes, aux_shapes, out_shapes; - std::map > arg_shapes; + std::vector > in_shapes, aux_shapes, out_shapes; + std::map > arg_shapes; for (const auto &arg_name : arg_name_list) { auto iter = known_args.find(arg_name); diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index e9f1e2d6cccc..93bf27ad5d70 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -487,7 +487,7 @@ MXNET_DLL int MXNDArrayCreateNone(NDArrayHandle *out); * \param out the returning handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayCreate(const mx_uint *shape, +MXNET_DLL int MXNDArrayCreate(const dim_t *shape, mx_uint ndim, int dev_type, int dev_id, @@ -506,7 +506,7 @@ MXNET_DLL int MXNDArrayCreate(const mx_uint *shape, * \param out the returning handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXNDArrayCreateEx(const mx_uint *shape, +MXNET_DLL int MXNDArrayCreateEx(const dim_t *shape, mx_uint ndim, int dev_type, int dev_id, @@ -533,7 +533,7 @@ MXNET_DLL int MXNDArrayCreateEx(const mx_uint *shape, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayCreateSparseEx(int storage_type, - const mx_uint *shape, + const dim_t *shape, mx_uint ndim, int dev_type, int dev_id, @@ -542,7 +542,7 @@ MXNET_DLL int MXNDArrayCreateSparseEx(int storage_type, mx_uint num_aux, int *aux_type, mx_uint *aux_ndims, - const mx_uint *aux_shape, + const dim_t *aux_shape, NDArrayHandle *out); /*! @@ -650,7 +650,7 @@ MXNET_DLL int MXNDArraySyncCopyToCPU(NDArrayHandle handle, */ MXNET_DLL int MXNDArraySyncCopyFromNDArray(NDArrayHandle handle_dst, const NDArrayHandle handle_src, - const int i); + const dim_t i); /*! * \brief check whether the NDArray format is valid @@ -693,8 +693,8 @@ MXNET_DLL int MXNDArrayFree(NDArrayHandle handle); * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArraySlice(NDArrayHandle handle, - mx_uint slice_begin, - mx_uint slice_end, + dim_t slice_begin, + dim_t slice_end, NDArrayHandle *out); /*! @@ -705,7 +705,7 @@ MXNET_DLL int MXNDArraySlice(NDArrayHandle handle, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayAt(NDArrayHandle handle, - mx_uint idx, + dim_t idx, NDArrayHandle *out); /*! @@ -749,7 +749,7 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle, */ MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle, mx_uint *out_dim, - const mx_uint **out_pdata); + const dim_t **out_pdata); /*! * \brief get the content of the data in NDArray * \param handle the handle to the ndarray @@ -1466,16 +1466,16 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const dim_t *arg_shape_data, mx_uint *in_shape_size, const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const dim_t ***in_shape_data, mx_uint *out_shape_size, const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const dim_t ***out_shape_data, mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const dim_t ***aux_shape_data, int *complete); /*! * \brief partially infer shape of unknown input shapes given the known one. @@ -1505,16 +1505,16 @@ MXNET_DLL int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const dim_t *arg_shape_data, mx_uint *in_shape_size, const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const dim_t ***in_shape_data, mx_uint *out_shape_size, const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const dim_t ***out_shape_data, mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const dim_t ***aux_shape_data, int *complete); /*! diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h index 4ba13ca6498a..4ba0e310461a 100644 --- a/include/mxnet/ndarray.h +++ b/include/mxnet/ndarray.h @@ -488,7 +488,7 @@ class NDArray { /*! * \brief Copy from src.data()/aux_data(i) to this->data()/aux_data(j) */ - void SyncCopyFromNDArray(const NDArray &src, int i = -1, int j = -1); + void SyncCopyFromNDArray(const NDArray &src, index_t i = -1, index_t j = -1); /*! * \brief Do a synchronize copy to a continugous CPU memory region. diff --git a/julia/src/base.jl b/julia/src/base.jl index ce1c183eafb5..0524d1ebde1e 100644 --- a/julia/src/base.jl +++ b/julia/src/base.jl @@ -26,6 +26,7 @@ Base.show(io::IO, e::MXError) = print(io, e.msg) # Common types used in MXNet API ################################################################################ const MX_uint = Cuint +const MX_long = Clonglong const MX_float = Cfloat const MX_handle = Ptr{Void} diff --git a/julia/src/ndarray.jl b/julia/src/ndarray.jl index 9e47150a1a00..d79d4c1b08c3 100644 --- a/julia/src/ndarray.jl +++ b/julia/src/ndarray.jl @@ -245,8 +245,8 @@ See also the notes on NDArray shapes [`NDArray`](@ref). """ function size(x::NDArray) ref_ndim = Ref{MX_uint}(0) - ref_shape = Ref{Ptr{MX_uint}}(0) - @mxcall(:MXNDArrayGetShape, (MX_handle, Ref{MX_uint}, Ref{Ptr{MX_uint}}), + ref_shape = Ref{Ptr{MX_long}}(0) + @mxcall(:MXNDArrayGetShape, (MX_handle, Ref{MX_uint}, Ref{Ptr{MX_long}}), x, ref_ndim, ref_shape) tuple(map(Int, flipdim(unsafe_wrap(Array, ref_shape[], ref_ndim[]),1))...) end @@ -278,8 +278,8 @@ ndims(x::NDArray) = ndims(x.handle) function ndims(x::MX_NDArrayHandle)::Int ref_ndim = Ref{MX_uint}(0) - ref_shape = Ref{Ptr{MX_uint}}(0) - @mxcall(:MXNDArrayGetShape, (MX_handle, Ref{MX_uint}, Ref{Ptr{MX_uint}}), + ref_shape = Ref{Ptr{MX_long}}(0) + @mxcall(:MXNDArrayGetShape, (MX_handle, Ref{MX_uint}, Ref{Ptr{MX_long}}), x, ref_ndim, ref_shape) ref_ndim[] end diff --git a/perl-package/AI-MXNetCAPI/mxnet.i b/perl-package/AI-MXNetCAPI/mxnet.i index b1907f5cd7ec..c900598700f7 100644 --- a/perl-package/AI-MXNetCAPI/mxnet.i +++ b/perl-package/AI-MXNetCAPI/mxnet.i @@ -384,7 +384,7 @@ int MXNDArrayCreateNone(NDArrayHandle *out); * \param out the returning handle * \return 0 when success, -1 when failure happens */ -int MXNDArrayCreate(const mx_uint *in, +int MXNDArrayCreate(const dim_t *in, mx_uint ndim, int dev_type, int dev_id, @@ -403,7 +403,7 @@ int MXNDArrayCreate(const mx_uint *in, * \param out the returning handle * \return 0 when success, -1 when failure happens */ -int MXNDArrayCreateEx(const mx_uint *in, +int MXNDArrayCreateEx(const dim_t *in, mx_uint ndim, int dev_type, int dev_id, @@ -428,7 +428,7 @@ int MXNDArrayCreateEx(const mx_uint *in, * \return 0 when success, -1 when failure happens */ int MXNDArrayCreateSparseEx(int storage_type, - const mx_uint *in, + const dim_t *in, mx_uint ndim, int dev_type, int dev_id, @@ -437,7 +437,7 @@ int MXNDArrayCreateSparseEx(int storage_type, mx_uint num_aux, int *in, mx_uint *in, - const mx_uint *in, + const dim_t *in, NDArrayHandle *out); @@ -589,8 +589,8 @@ int MXNDArrayFree(NDArrayHandle handle); * \return 0 when success, -1 when failure happens */ int MXNDArraySlice(NDArrayHandle handle, - mx_uint slice_begin, - mx_uint slice_end, + dim_t slice_begin, + dim_t slice_end, NDArrayHandle *out); /*! * \brief Index the NDArray along axis 0. @@ -600,7 +600,7 @@ int MXNDArraySlice(NDArrayHandle handle, * \return 0 when success, -1 when failure happens */ int MXNDArrayAt(NDArrayHandle handle, - mx_uint idx, + dim_t idx, NDArrayHandle *out); /*! * \brief get the storage type of the array @@ -642,7 +642,7 @@ int MXNDArrayReshape64(NDArrayHandle handle, */ int MXNDArrayGetShape(NDArrayHandle handle, mx_uint *out_dim, - const mx_uint **out_pdata); + const dim_t **out_pdata); /*! * \brief get the content of the data in NDArray * \param handle the handle to the ndarray @@ -1293,16 +1293,16 @@ int MXSymbolInferShape(SymbolHandle sym, mx_uint num_args, const char** in, const mx_uint *in, - const mx_uint *in, + const dim_t *in, mx_uint *in_shape_size, const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const dim_t ***in_shape_data, mx_uint *out_shape_size, const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const dim_t ***out_shape_data, mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const dim_t ***aux_shape_data, int *out); /*! * \brief partially infer shape of unknown input shapes given the known one. @@ -1332,16 +1332,16 @@ int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** in, const mx_uint *in, - const mx_uint *in, + const dim_t *in, mx_uint *in_shape_size, const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const dim_t ***in_shape_data, mx_uint *out_shape_size, const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const dim_t ***out_shape_data, mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const dim_t ***aux_shape_data, int *out); /*! diff --git a/python/mxnet/base.py b/python/mxnet/base.py index feb4d70b6533..3582adc62002 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -215,6 +215,10 @@ def _load_lib(): # type definitions mx_uint = ctypes.c_uint mx_float = ctypes.c_float +if sys.version_info.major > 2: + mx_long = ctypes.c_longlong +else: + mx_long = ctypes.c_long mx_float_p = ctypes.POINTER(mx_float) mx_real_t = np.float32 NDArrayHandle = ctypes.c_void_p diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index 78ec0b91f88d..3f536c99eea4 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -29,13 +29,14 @@ from array import array as native_array import ctypes +import sys import warnings import operator from functools import reduce # pylint: disable=redefined-builtin import numpy as np from ..base import _LIB, numeric_types, integer_types from ..base import c_str, c_array, c_array_buf, c_handle_array, mx_real_t -from ..base import mx_uint, NDArrayHandle, check_call, DLPackHandle +from ..base import mx_uint, mx_long, NDArrayHandle, check_call, DLPackHandle from ..base import ctypes2buffer from ..context import Context, current_context from . import _internal @@ -47,7 +48,7 @@ "imdecode", "lesser", "lesser_equal", "logical_and", "logical_or", "logical_xor", "maximum", "minimum", "moveaxis", "modulo", "multiply", "not_equal", "onehot_encode", "power", "subtract", "true_divide", "waitall", "_new_empty_handle", "histogram", - "to_dlpack_for_read", "to_dlpack_for_write", "from_dlpack"] + "to_dlpack_for_read", "to_dlpack_for_write", "from_dlpack", "get_array_typecode"] _STORAGE_TYPE_UNDEFINED = -1 _STORAGE_TYPE_DEFAULT = 0 @@ -131,7 +132,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): """ hdl = NDArrayHandle() check_call(_LIB.MXNDArrayCreateEx( - c_array_buf(mx_uint, native_array('I', shape)), + c_array_buf(mx_long, native_array(get_array_typecode(), shape)), mx_uint(len(shape)), ctypes.c_int(ctx.device_typeid), ctypes.c_int(ctx.device_id), @@ -902,7 +903,7 @@ def _slice(self, start, stop): start, stop, _ = _get_index_range(start, stop, self.shape[0]) check_call(_LIB.MXNDArraySlice( - self.handle, mx_uint(start), mx_uint(stop), ctypes.byref(handle))) + self.handle, mx_long(start), mx_long(stop), ctypes.byref(handle))) return NDArray(handle=handle, writable=self.writable) def _at(self, idx): @@ -936,7 +937,7 @@ def _at(self, idx): raise IndexError('index %d is out of bounds for axis 0 with size %d' % (idx-length, length)) check_call(_LIB.MXNDArrayAt( - self.handle, mx_uint(idx), ctypes.byref(handle))) + self.handle, mx_long(idx), ctypes.byref(handle))) return NDArray(handle=handle, writable=self.writable) def reshape(self, *shape, **kwargs): @@ -1834,7 +1835,7 @@ def shape(self): (2L, 3L, 4L) """ ndim = mx_uint() - pdata = ctypes.POINTER(mx_uint)() + pdata = ctypes.POINTER(mx_long)() check_call(_LIB.MXNDArrayGetShape( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index @@ -4033,3 +4034,10 @@ def from_dlpack(dlpack): # delete the deleter of the old dlpack ctypes.pythonapi.PyCapsule_SetDestructor(dlpack, None) return NDArray(handle=handle) + + +def get_array_typecode(): + if sys.version_info.major > 2: + return 'q' + else: + return 'l' diff --git a/python/mxnet/ndarray/sparse.py b/python/mxnet/ndarray/sparse.py index 928079749db5..5c7d4545fe34 100644 --- a/python/mxnet/ndarray/sparse.py +++ b/python/mxnet/ndarray/sparse.py @@ -41,7 +41,7 @@ from ..base import NotSupportedForSparseNDArray from ..base import _LIB, numeric_types from ..base import c_array_buf, mx_real_t, integer_types -from ..base import mx_uint, NDArrayHandle, check_call +from ..base import mx_uint, mx_long, NDArrayHandle, check_call from ..context import Context, current_context from . import _internal from . import op @@ -56,6 +56,7 @@ from .ndarray import zeros as _zeros_ndarray from .ndarray import array as _array from .ndarray import _ufunc_helper +from .ndarray import get_array_typecode try: @@ -90,7 +91,7 @@ def _new_alloc_handle(stype, shape, ctx, delay_alloc, dtype, aux_types, aux_shap num_aux = mx_uint(len(aux_types)) check_call(_LIB.MXNDArrayCreateSparseEx( ctypes.c_int(int(_STORAGE_TYPE_STR_TO_ID[stype])), - c_array_buf(mx_uint, native_array('I', shape)), + c_array_buf(mx_long, native_array(get_array_typecode(), shape)), mx_uint(len(shape)), ctypes.c_int(ctx.device_typeid), ctypes.c_int(ctx.device_id), @@ -99,7 +100,7 @@ def _new_alloc_handle(stype, shape, ctx, delay_alloc, dtype, aux_types, aux_shap num_aux, c_array_buf(ctypes.c_int, native_array('i', aux_type_ids)), c_array_buf(mx_uint, native_array('I', aux_shape_lens)), - c_array_buf(mx_uint, native_array('I', aux_shapes)), + c_array_buf(mx_long, native_array(get_array_typecode(), aux_shapes)), ctypes.byref(hdl))) return hdl @@ -1011,9 +1012,9 @@ def _csr_matrix_from_definition(data, indices, indptr, shape=None, ctx=None, raise ValueError('invalid shape') result = CSRNDArray(_new_alloc_handle(storage_type, shape, ctx, False, dtype, [indptr_type, indices_type], aux_shapes)) - check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, data.handle, ctypes.c_int(-1))) - check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, indptr.handle, ctypes.c_int(0))) - check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, indices.handle, ctypes.c_int(1))) + check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, data.handle, ctypes.c_longlong(-1))) + check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, indptr.handle, ctypes.c_longlong(0))) + check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, indices.handle, ctypes.c_longlong(1))) return result # pylint: enable= no-member, protected-access diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index 530d72796c00..abe5ae389d71 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -34,7 +34,7 @@ from ..attribute import AttrScope from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array -from ..base import mx_uint, py_str, string_types, integer_types +from ..base import mx_uint, mx_long, py_str, string_types, integer_types from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle from ..base import check_call, MXNetError, NotImplementedForSymbol from ..context import Context, current_context @@ -45,6 +45,8 @@ from . import _internal from . import op from ._internal import SymbolBase, _set_symbol_class +from ..ndarray.ndarray import get_array_typecode + __all__ = ["Symbol", "var", "Variable", "Group", "load", "load_json", "pow", "maximum", "minimum", "hypot", "eye", "zeros", "ones", "full", "arange", @@ -1096,13 +1098,13 @@ def _infer_shape_impl(self, partial, *args, **kwargs): keys = c_str_array(str_keys) arg_shape_size = mx_uint() arg_shape_ndim = ctypes.POINTER(mx_uint)() - arg_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + arg_shape_data = ctypes.POINTER(ctypes.POINTER(mx_long))() out_shape_size = mx_uint() out_shape_ndim = ctypes.POINTER(mx_uint)() - out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_long))() aux_shape_size = mx_uint() aux_shape_ndim = ctypes.POINTER(mx_uint)() - aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_long))() complete = ctypes.c_int() if partial: infer_func = _LIB.MXSymbolInferShapePartial @@ -1113,7 +1115,7 @@ def _infer_shape_impl(self, partial, *args, **kwargs): mx_uint(len(indptr) - 1), keys, c_array_buf(mx_uint, array('I', indptr)), - c_array_buf(mx_uint, array('I', sdata)), + c_array_buf(mx_long, array(get_array_typecode(), sdata)), ctypes.byref(arg_shape_size), ctypes.byref(arg_shape_ndim), ctypes.byref(arg_shape_data), diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc index 17d166eac345..bbd161d6aa5b 100644 --- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc +++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc @@ -83,10 +83,18 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayCreateNone JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayCreateEx (JNIEnv *env, jobject obj, jintArray shape, jint ndim, jint devType, jint devId, jint delayAlloc, jint dtype, jobject ndArrayHandle) { + // TODO(andrewfayres): this is a workaround to get scala unit test pass + // need to update scala APIs to support large array + const size_t length = env->GetArrayLength(shape); jint *shapeArr = env->GetIntArrayElements(shape, NULL); + jlong *tmpShapeArr = new jlong[length]; + for (size_t i = 0; i < length; ++i) { + tmpShapeArr[i] = shapeArr[i]; + } NDArrayHandle out; - int ret = MXNDArrayCreateEx(reinterpret_cast(shapeArr), static_cast(ndim), + int ret = MXNDArrayCreateEx(reinterpret_cast(tmpShapeArr), static_cast(ndim), devType, devId, delayAlloc, dtype, &out); + delete[] tmpShapeArr; env->ReleaseIntArrayElements(shape, shapeArr, 0); SetLongField(env, ndArrayHandle, reinterpret_cast(out)); return ret; @@ -354,7 +362,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayLoadFromRawBytes JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape (JNIEnv *env, jobject obj, jlong ndArrayPtr, jobject ndimRef, jobject dataBuf) { mx_uint ndim; - const mx_uint *pdata; + const dim_t *pdata; int ret = MXNDArrayGetShape(reinterpret_cast(ndArrayPtr), &ndim, &pdata); // fill dataBuf @@ -365,7 +373,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape jmethodID arrayAppend = env->GetMethodID(arrayClass, "$plus$eq", "(Ljava/lang/Object;)Lscala/collection/mutable/ArrayBuffer;"); for (size_t i = 0; i < ndim; ++i) { - jobject data = env->NewObject(integerClass, newInteger, pdata[i]); + jobject data = env->NewObject(integerClass, newInteger, static_cast(pdata[i])); env->CallObjectMethod(dataBuf, arrayAppend, data); env->DeleteLocalRef(data); } @@ -381,8 +389,10 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArraySyncCopyToCPU (JNIEnv *env, jobject obj, jlong ndArrayPtr, jbyteArray data, jint size) { jbyte *pdata = env->GetByteArrayElements(data, NULL); + // TODO(andrewfayres): this is a workaround to get scala unit test pass + // need to update scala APIs to support large array int ret = MXNDArraySyncCopyToCPU(reinterpret_cast(ndArrayPtr), - reinterpret_cast(pdata), size); + reinterpret_cast(pdata), static_cast(size)); env->ReleaseByteArrayElements(data, pdata, 0); // copy back to java array automatically return ret; } @@ -417,8 +427,11 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayReshape JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArraySyncCopyFromCPU (JNIEnv *env, jobject obj, jlong arrayPtr, jfloatArray sourceArr, jint arrSize) { jfloat *sourcePtr = env->GetFloatArrayElements(sourceArr, NULL); + // TODO(andrewfayres): this is a workaround to get scala unit test pass + // need to update scala APIs to support large array int ret = MXNDArraySyncCopyFromCPU(reinterpret_cast(arrayPtr), - static_cast(sourcePtr), arrSize); + static_cast(sourcePtr), + static_cast(arrSize)); env->ReleaseFloatArrayElements(sourceArr, sourcePtr, 0); return ret; } @@ -1519,7 +1532,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolCreateFromFile int FillSymbolInferShape (JNIEnv *env, jmethodID listAppend, jobject joutData, - mx_uint shapeSize, const mx_uint *shapeNdim, const mx_uint **shapeData) { + mx_uint shapeSize, const mx_uint *shapeNdim, const dim_t **shapeData) { for (size_t i = 0; i < shapeSize; ++i) { jintArray jshape = env->NewIntArray(shapeNdim[i]); if (jshape == NULL) { @@ -1549,25 +1562,32 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape mx_uint inShapeSize; const mx_uint *inShapeNdim; - const mx_uint **inShapeData; + const dim_t **inShapeData; mx_uint outShapeSize; const mx_uint *outShapeNdim; - const mx_uint **outShapeData; + const dim_t **outShapeData; mx_uint auxShapeSize; const mx_uint *auxShapeNdim; - const mx_uint **auxShapeData; + const dim_t **auxShapeData; int complete; jint *argIndPtr = env->GetIntArrayElements(jargIndPtr, NULL); jint *argShapeData = env->GetIntArrayElements(jargShapeData, NULL); + // TODO(andrewfayres): this is a workaround to get scala unit test pass + // need to update scala APIs to support large array + const size_t argShapeLength = env->GetArrayLength(jargShapeData); + jlong *argShapeDataTmp = new jlong[argShapeLength]; + for (size_t i = 0; i < argShapeLength; ++i) { + argShapeDataTmp[i] = argShapeData[i]; + } int ret = MXSymbolInferShape(reinterpret_cast(symbolPtr), static_cast(jnumArgs), keys, reinterpret_cast(argIndPtr), - reinterpret_cast(argShapeData), + reinterpret_cast(argShapeDataTmp), &inShapeSize, &inShapeNdim, &inShapeData, @@ -1578,6 +1598,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape &auxShapeNdim, &auxShapeData, &complete); + delete[] argShapeDataTmp; env->ReleaseIntArrayElements(jargShapeData, argShapeData, 0); env->ReleaseIntArrayElements(jargIndPtr, argIndPtr, 0); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 80bd60538ff5..5ed6d085bb13 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -151,7 +151,7 @@ int MXNDArrayCreateNone(NDArrayHandle *out) { API_END(); } -int MXNDArrayCreate(const mx_uint *shape, +int MXNDArrayCreate(const dim_t *shape, mx_uint ndim, int dev_type, int dev_id, @@ -165,7 +165,7 @@ int MXNDArrayCreate(const mx_uint *shape, API_END(); } -int MXNDArrayCreateEx(const mx_uint *shape, +int MXNDArrayCreateEx(const dim_t *shape, mx_uint ndim, int dev_type, int dev_id, @@ -182,7 +182,7 @@ int MXNDArrayCreateEx(const mx_uint *shape, } int MXNDArrayCreateSparseEx(int storage_type, - const mx_uint *shape, + const dim_t *shape, mx_uint ndim, int dev_type, int dev_id, @@ -191,7 +191,7 @@ int MXNDArrayCreateSparseEx(int storage_type, mx_uint num_aux, int *aux_type, mx_uint *aux_ndims, - const mx_uint *aux_shape, + const dim_t *aux_shape, NDArrayHandle *out) { API_BEGIN(); std::vector aux_types; @@ -266,7 +266,7 @@ int MXNDArraySyncCopyToCPU(NDArrayHandle handle, */ int MXNDArraySyncCopyFromNDArray(NDArrayHandle handle_dst, const NDArrayHandle handle_src, - const int i) { + const dim_t i) { API_BEGIN(); NDArray* dst = static_cast(handle_dst); NDArray* src = static_cast(handle_src); @@ -394,8 +394,8 @@ int MXNDArrayFree(NDArrayHandle handle) { } int MXNDArraySlice(NDArrayHandle handle, - mx_uint slice_begin, - mx_uint slice_end, + dim_t slice_begin, + dim_t slice_end, NDArrayHandle *out) { NDArray *ptr = new NDArray(); API_BEGIN(); @@ -406,7 +406,7 @@ int MXNDArraySlice(NDArrayHandle handle, } int MXNDArrayAt(NDArrayHandle handle, - mx_uint idx, + dim_t idx, NDArrayHandle *out) { NDArray *ptr = new NDArray(); API_BEGIN(); @@ -482,14 +482,14 @@ int MXNDArrayGetStorageType(NDArrayHandle handle, int MXNDArrayGetShape(NDArrayHandle handle, mx_uint *out_dim, - const mx_uint **out_pdata) { + const dim_t **out_pdata) { MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); NDArray *arr = static_cast(handle); if (!arr->is_none()) { const TShape &s = arr->shape(); *out_dim = s.ndim(); - std::vector& buffer = ret->arg_shape_buffer; + std::vector& buffer = ret->arg_shape_buffer; buffer.resize(s.ndim()); nnvm::ShapeTypeCast(s.begin(), s.end(), buffer.data()); *out_pdata = buffer.data(); diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index 079b587e9965..12e823cf3183 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -84,23 +84,23 @@ struct MXAPIThreadLocalEntry { /*! \brief result holder for returning shape dimensions */ std::vector arg_shape_ndim, out_shape_ndim, aux_shape_ndim; /*! \brief result holder for returning shape pointer */ - std::vector arg_shape_data, out_shape_data, aux_shape_data; + std::vector arg_shape_data, out_shape_data, aux_shape_data; /*! \brief uint32_t buffer for returning shape pointer */ - std::vector arg_shape_buffer, out_shape_buffer, aux_shape_buffer; + std::vector arg_shape_buffer, out_shape_buffer, aux_shape_buffer; /*! \brief bool buffer */ std::vector save_inputs, save_outputs; // helper function to setup return value of shape array inline static void SetupShapeArrayReturnWithBuffer( const std::vector &shapes, std::vector *ndim, - std::vector *data, - std::vector *buffer) { + std::vector *data, + std::vector *buffer) { ndim->resize(shapes.size()); data->resize(shapes.size()); size_t size = 0; for (const auto& s : shapes) size += s.ndim(); buffer->resize(size); - uint32_t *ptr = buffer->data(); + dim_t *ptr = buffer->data(); for (size_t i = 0; i < shapes.size(); ++i) { ndim->at(i) = shapes[i].ndim(); data->at(i) = ptr; diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 73a8a7ca6f86..c90a4524ae5f 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -505,16 +505,16 @@ int MXSymbolInferShape(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const dim_t *arg_shape_data, mx_uint *in_shape_size, const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const dim_t ***in_shape_data, mx_uint *out_shape_size, const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const dim_t ***out_shape_data, mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const dim_t ***aux_shape_data, int *complete) { nnvm::Symbol *s = static_cast(sym); MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); @@ -572,16 +572,16 @@ int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const dim_t *arg_shape_data, mx_uint *in_shape_size, const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const dim_t ***in_shape_data, mx_uint *out_shape_size, const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const dim_t ***out_shape_data, mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const dim_t ***aux_shape_data, int *complete) { int succ; *complete = 1; diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 081d4e759323..02d0070b117c 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -1849,7 +1849,7 @@ void NDArray::SyncCopyFromCPU(const void *data, size_t size) const { /*! * \brief Copy src.data()/aux_data(i) to dst->data()/aux_data(j). */ -void NDArray::SyncCopyFromNDArray(const NDArray& src, int i, int j) { +void NDArray::SyncCopyFromNDArray(const NDArray& src, index_t i, index_t j) { if (i >= 0) { CHECK_NE(src.storage_type(), kDefaultStorage); } else { diff --git a/src/operator/elemwise_op_common.h b/src/operator/elemwise_op_common.h index cf44da699156..4b8663bba6ea 100644 --- a/src/operator/elemwise_op_common.h +++ b/src/operator/elemwise_op_common.h @@ -100,7 +100,7 @@ inline bool ElemwiseStorageAttr(const nnvm::NodeAttrs& attrs, * \tparam rsp whether row sparse stype is supported * \tparam rsp whether csr stype is supported */ -template +template inline bool ElemwiseStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask, DispatchMode* dispatch_mode, @@ -115,7 +115,7 @@ inline bool ElemwiseStorageType(const nnvm::NodeAttrs& attrs, template + index_t n_in = -1, index_t n_out = -1> inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs, @@ -154,7 +154,7 @@ inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs, return true; } -template +template inline bool ElemwiseShape(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs) { @@ -168,7 +168,7 @@ inline bool ElemwiseShape(const nnvm::NodeAttrs& attrs, attrs, in_attrs, out_attrs, TShape()); } -template +template inline bool ElemwiseType(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs) { diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index 5b106afd8d5b..6cab1990858b 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -289,8 +289,8 @@ inline int get_num_threads(const int N) { /* \brief Compute flattened index given coordinates and shape. */ template -MSHADOW_XINLINE int ravel(const Shape& coord, const Shape& shape) { - int ret = 0; +MSHADOW_XINLINE index_t ravel(const Shape& coord, const Shape& shape) { + index_t ret = 0; #pragma unroll for (int i = 0; i < ndim; ++i) { ret = ret * shape[i] + (shape[i] > coord[i]) * coord[i]; @@ -301,11 +301,11 @@ MSHADOW_XINLINE int ravel(const Shape& coord, const Shape& shape) { /* Compute coordinates from flattened index given shape */ template -MSHADOW_XINLINE Shape unravel(const int idx, const Shape& shape) { +MSHADOW_XINLINE Shape unravel(const index_t idx, const Shape& shape) { Shape ret; #pragma unroll - for (int i = ndim-1, j = idx; i >=0; --i) { - int tmp = j / shape[i]; + for (index_t i = ndim-1, j = idx; i >=0; --i) { + auto tmp = j / shape[i]; ret[i] = j - tmp*shape[i]; j = tmp; } @@ -315,8 +315,8 @@ MSHADOW_XINLINE Shape unravel(const int idx, const Shape& shape) { /* Compute dot product of two vector */ template -MSHADOW_XINLINE int dot(const Shape& coord, const Shape& stride) { - int ret = 0; +MSHADOW_XINLINE index_t dot(const Shape& coord, const Shape& stride) { + index_t ret = 0; #pragma unroll for (int i = 0; i < ndim; ++i) { ret += coord[i] * stride[i]; @@ -327,12 +327,12 @@ MSHADOW_XINLINE int dot(const Shape& coord, const Shape& stride) { /* Combining unravel and dot */ template -MSHADOW_XINLINE int unravel_dot(const int idx, const Shape& shape, +MSHADOW_XINLINE index_t unravel_dot(const index_t idx, const Shape& shape, const Shape& stride) { - int ret = 0; + index_t ret = 0; #pragma unroll - for (int i = ndim-1, j = idx; i >=0; --i) { - int tmp = j / shape[i]; + for (index_t i = ndim-1, j = idx; i >=0; --i) { + auto tmp = j / shape[i]; ret += (j - tmp*shape[i])*stride[i]; j = tmp; } @@ -433,51 +433,51 @@ struct op_with_req { /*! \brief input is one tensor */ template - MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in) { + MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *in) { KERNEL_ASSIGN(out[i], req, OP::Map(in[i])); } /*! \brief inputs are two tensors */ template - MSHADOW_XINLINE static void Map(int i, DType *out, const DType *lhs, const DType *rhs) { + MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *lhs, const DType *rhs) { KERNEL_ASSIGN(out[i], req, OP::Map(lhs[i], rhs[i])); } /*! \brief input is tensor and a scalar value */ template - MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in, const DType value) { + MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *in, const DType value) { KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value)); } /*! \brief input is tensor and two scalar value */ template - MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in, + MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *in, const DType value_1, const DType value_2) { KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value_1, value_2)); } /*! \brief No inputs (ie fill to constant value) */ template - MSHADOW_XINLINE static void Map(int i, DType *out) { + MSHADOW_XINLINE static void Map(index_t i, DType *out) { KERNEL_ASSIGN(out[i], req, OP::Map()); } /*! \brief input is single scalar value */ template - MSHADOW_XINLINE static void Map(int i, DType *out, const DType value) { + MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType value) { KERNEL_ASSIGN(out[i], req, OP::Map(value)); } /*! \brief inputs are two tensors and a scalar value */ template - MSHADOW_XINLINE static void Map(int i, DType *out, + MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *input_1, const DType *input_2, const DType value) { KERNEL_ASSIGN(out[i], req, OP::Map(input_1[i], input_2[i], value)); } /*! \brief inputs are three tensors (ie backward grad with binary grad function) */ template - MSHADOW_XINLINE static void Map(int i, DType *out, + MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *input_1, const DType *input_2, const DType *input_3) { @@ -503,21 +503,21 @@ struct Kernel { * \param args Varargs to eventually pass to the OP::Map() function */ template - inline static bool Launch(mshadow::Stream *, const int N, Args... args) { + inline static bool Launch(mshadow::Stream *, const size_t N, Args... args) { #ifdef _OPENMP const int omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); if (omp_threads < 2) { - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { OP::Map(i, args...); } } else { #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < N; ++i) { + for (index_t i = 0; i < static_cast(N); ++i) { OP::Map(i, args...); } } #else - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { OP::Map(i, args...); } #endif @@ -567,22 +567,22 @@ struct Kernel { * \param args Varargs to eventually pass to the OP::Map() function */ template - static void LaunchTuned(mshadow::Stream *, const int N, Args... args) { + static void LaunchTuned(mshadow::Stream *, const size_t N, Args... args) { #ifdef _OPENMP const int omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); if (omp_threads < 2 || !tuned_op::UseOMP( - static_cast(N), static_cast(omp_threads))) { - for (int i = 0; i < N; ++i) { + N, static_cast(omp_threads))) { + for (size_t i = 0; i < N; ++i) { OP::Map(i, args...); } } else { #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < N; ++i) { + for (index_t i = 0; i < static_cast(N); ++i) { OP::Map(i, args...); } } #else - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { OP::Map(i, args...); } #endif @@ -596,15 +596,15 @@ struct Kernel { * \param args Varargs to eventually pass to the UseOMP() and OP::Map() functions */ template - inline static void LaunchEx(mshadow::Stream *s, const int N, Args... args) { + inline static void LaunchEx(mshadow::Stream *s, const size_t N, Args... args) { #ifdef _OPENMP const int omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); if (omp_threads < 2) { OP::Map(0, N, args...); } else { - const int length = (N + omp_threads - 1) / omp_threads; + const auto length = (N + omp_threads - 1) / omp_threads; #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < N; i += length) { + for (index_t i = 0; i < static_cast(N); i += length) { OP::Map(i, i + length > N ? N - i : length, args...); } } @@ -626,7 +626,7 @@ struct Kernel { template static MSHADOW_CINLINE typename std::enable_if::value, bool>::type - Launch(mshadow::Stream *s, const int N, DType *dest, Args... args) { + Launch(mshadow::Stream *s, const size_t N, DType *dest, Args... args) { LaunchTuned(s, N, dest, args...); return true; } @@ -644,7 +644,7 @@ struct Kernel { template static MSHADOW_CINLINE typename std::enable_if::value, bool>::type - Launch(mshadow::Stream *s, const int N, DType *dest, Args... args) { + Launch(mshadow::Stream *s, const size_t N, DType *dest, Args... args) { LaunchTuned(s, N, dest, args...); return true; } @@ -700,7 +700,7 @@ template struct set_to_int : public tunable { // mxnet_op version (when used directly with Kernel<>::Launch()) */ template - MSHADOW_XINLINE static void Map(int i, DType *out) { + MSHADOW_XINLINE static void Map(index_t i, DType *out) { out[i] = DType(val); } // mshadow_op version (when used with op_with_req<>) diff --git a/src/operator/random/sampler.h b/src/operator/random/sampler.h index 44f80ab56254..de84a58323c6 100644 --- a/src/operator/random/sampler.h +++ b/src/operator/random/sampler.h @@ -43,32 +43,33 @@ namespace op { template inline static void LaunchRNG(mshadow::Stream *s, common::random::RandGenerator *gen, - const int N, Args... args) { + const index_t N, Args... args) { // minimal check to avoid division by zero, below. // if `N` is zero the map operation is a no-op in any case. if (N <= 0) { return; } - const int nloop = (N + RandGenerator::kMinNumRandomPerThread - 1) / + const index_t nloop = (N + RandGenerator::kMinNumRandomPerThread - 1) / RandGenerator::kMinNumRandomPerThread; - const int nthread = std::min(nloop, RandGenerator::kNumRandomStates); - const int step = (N + nthread - 1) / nthread; + const index_t nthread = std::min(nloop, + static_cast(RandGenerator::kNumRandomStates)); + const index_t step = (N + nthread - 1) / nthread; Kernel::Launch(s, nthread, *gen, N, step, args...); } #define RNG_KERNEL_LOOP(xpu, GType, thread_id, gen, N, step, ...) \ - const int start = thread_id * step; \ - const int end = start + step; \ + const index_t start = thread_id * step; \ + const index_t end = start + step; \ typename RandGenerator::Impl genImpl(&gen, thread_id); \ - for (int i = start; i < end && i < N; ++i) { \ + for (index_t i = start; i < end && i < N; ++i) { \ {__VA_ARGS__} \ } template struct SampleUniformKernel { template - MSHADOW_XINLINE static void Map(int id, RandGenerator gen, - const int N, const int step, + MSHADOW_XINLINE static void Map(index_t id, RandGenerator gen, + const index_t N, const index_t step, index_t nParm, index_t nSample, const IType *lower, const IType *upper, OType *out) { RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, { @@ -95,8 +96,8 @@ struct UniformSampler { template struct SampleNormalKernel { template - MSHADOW_XINLINE static void Map(int id, RandGenerator gen, - const int N, const int step, + MSHADOW_XINLINE static void Map(index_t id, RandGenerator gen, + const index_t N, const index_t step, index_t nParm, index_t nSample, const IType *mean, const IType *std, OType *out) { RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, { @@ -122,8 +123,8 @@ struct NormalSampler { template struct SampleExponentialKernel { template - MSHADOW_XINLINE static void Map(int id, RandGenerator gen, - const int N, const int step, + MSHADOW_XINLINE static void Map(index_t id, RandGenerator gen, + const index_t N, const index_t step, index_t nParm, index_t nSample, const IType *lambda, OType *out) { RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, { @@ -170,8 +171,8 @@ MSHADOW_XINLINE OType SampleGamma(IType a, IType b, typename RandGenerator struct SampleGammaKernel { template - MSHADOW_XINLINE static void Map(int id, RandGenerator gen, - const int N, const int step, + MSHADOW_XINLINE static void Map(index_t id, RandGenerator gen, + const index_t N, const index_t step, index_t nParm, index_t nSample, const IType *alpha, const IType *beta, OType *out) { RNG_KERNEL_LOOP(xpu, FType, id, gen, N, step, { @@ -232,8 +233,8 @@ MSHADOW_XINLINE int SamplePoisson(float lambda, typename RandGenerator struct SamplePoissonKernel { template - MSHADOW_XINLINE static void Map(int id, RandGenerator gen, - const int N, const int step, + MSHADOW_XINLINE static void Map(index_t id, RandGenerator gen, + const index_t N, const index_t step, index_t nParm, index_t nSample, const IType *lambda, OType *out) { RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, { @@ -259,8 +260,8 @@ struct PoissonSampler { template struct SampleNegativeBinomialKernel { template - MSHADOW_XINLINE static void Map(int id, RandGenerator gen, - const int N, const int step, + MSHADOW_XINLINE static void Map(index_t id, RandGenerator gen, + const index_t N, const index_t step, index_t nParm, index_t nSample, const IType *k, const IType *p, OType *out) { RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, { @@ -291,8 +292,8 @@ struct NegativeBinomialSampler { template struct SampleGeneralizedNegativeBinomialKernel { template - MSHADOW_XINLINE static void Map(int id, RandGenerator gen, - const int N, const int step, + MSHADOW_XINLINE static void Map(index_t id, RandGenerator gen, + const index_t N, const index_t step, index_t nParm, index_t nSample, const IType *mu, const IType *alpha, OType *out) { RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, { diff --git a/src/operator/tensor/broadcast_reduce-inl.h b/src/operator/tensor/broadcast_reduce-inl.h index 167fa34b083f..141d2fb83d0d 100644 --- a/src/operator/tensor/broadcast_reduce-inl.h +++ b/src/operator/tensor/broadcast_reduce-inl.h @@ -53,14 +53,14 @@ MSHADOW_XINLINE Shape calc_stride(const Shape& shape) { } template -MSHADOW_XINLINE void unravel_dot(const int idx, const Shape& shape, - const Shape& stridej, const Shape& stridek, int* j, int* k) { +MSHADOW_XINLINE void unravel_dot(const index_t idx, const Shape& shape, + const Shape& stridej, const Shape& stridek, index_t* j, index_t* k) { *j = 0; *k = 0; #pragma unroll - for (int i = ndim-1, idx_t = idx; i >=0; --i) { - const int tmp = idx_t / shape[i]; - const int coord = idx_t - tmp*shape[i]; + for (index_t i = ndim-1, idx_t = idx; i >=0; --i) { + const auto tmp = idx_t / shape[i]; + const auto coord = idx_t - tmp*shape[i]; *j += coord*stridej[i]; *k += coord*stridek[i]; idx_t = tmp; @@ -68,11 +68,11 @@ MSHADOW_XINLINE void unravel_dot(const int idx, const Shape& shape, } template -MSHADOW_XINLINE Shape unravel(const int idx, const Shape& shape) { +MSHADOW_XINLINE Shape unravel(const index_t idx, const Shape& shape) { Shape ret; #pragma unroll - for (int i = ndim-1, j = idx; i >=0; --i) { - int tmp = j / shape[i]; + for (index_t i = ndim-1, j = idx; i >=0; --i) { + auto tmp = j / shape[i]; ret[i] = j - tmp*shape[i]; j = tmp; } @@ -80,10 +80,10 @@ MSHADOW_XINLINE Shape unravel(const int idx, const Shape& shape) { } template -MSHADOW_XINLINE int ravel(const Shape& coord, const Shape& shape) { - int ret = 0; +MSHADOW_XINLINE index_t ravel(const Shape& coord, const Shape& shape) { + index_t ret = 0; #pragma unroll - for (int i = 0; i < ndim; ++i) { + for (index_t i = 0; i < ndim; ++i) { ret = ret * shape[i] + (shape[i] > 1) * coord[i]; } return ret; @@ -111,12 +111,12 @@ MSHADOW_XINLINE int diff(const Shape& small, const Shape& big, Shape } template -MSHADOW_XINLINE int unravel_dot(const int idx, const Shape& shape, +MSHADOW_XINLINE index_t unravel_dot(const index_t idx, const Shape& shape, const Shape& stride) { - int ret = 0; + index_t ret = 0; #pragma unroll - for (int i = ndim-1, j = idx; i >=0; --i) { - int tmp = j / shape[i]; + for (index_t i = ndim-1, j = idx; i >=0; --i) { + auto tmp = j / shape[i]; ret += (j - tmp*shape[i])*stride[i]; j = tmp; } @@ -124,8 +124,8 @@ MSHADOW_XINLINE int unravel_dot(const int idx, const Shape& shape, } template -MSHADOW_XINLINE int dot(const Shape& coord, const Shape& stride) { - int ret = 0; +MSHADOW_XINLINE index_t dot(const Shape& coord, const Shape& stride) { + index_t ret = 0; #pragma unroll for (int i = 0; i < ndim; ++i) ret += coord[i] * stride[i]; @@ -142,27 +142,27 @@ MSHADOW_XINLINE void assign(DType* dst, const bool addto, const DType src) { } template -MSHADOW_XINLINE void binary_broadcast_assign(const int idx, const bool addto, +MSHADOW_XINLINE void binary_broadcast_assign(const index_t idx, const bool addto, const DType* __restrict lhs, const DType* __restrict rhs, DType* out, const Shape& lshape, const Shape& rshape, const Shape& oshape) { const Shape coord = unravel(idx, oshape); - const int j = ravel(coord, lshape); - const int k = ravel(coord, rshape); + const index_t j = ravel(coord, lshape); + const index_t k = ravel(coord, rshape); assign(&out[idx], addto, OP::Map(lhs[j], rhs[k])); } template -MSHADOW_XINLINE void seq_reduce_assign(const int idx, const int M, const bool addto, +MSHADOW_XINLINE void seq_reduce_assign(const index_t idx, const size_t M, const bool addto, const DType* __restrict big, DType *small, const Shape& bshape, const Shape& sshape, const Shape& rshape, const Shape& rstride) { Shape coord = unravel(idx, sshape); - int j = ravel(coord, bshape); + index_t j = ravel(coord, bshape); DType val, residual; Reducer::SetInitValue(val, residual); - for (int k = 0; k < M; ++k) { + for (size_t k = 0; k < M; ++k) { coord = unravel(k, rshape); Reducer::Reduce(val, OP::Map(big[j + dot(coord, rstride)]), residual); } @@ -176,10 +176,10 @@ MSHADOW_XINLINE void seq_reduce_assign(const int idx, const int M, const bool ad #else template -void binary_broadcast_compute(const int N, const bool addto, const DType *lhs, +void binary_broadcast_compute(const size_t N, const bool addto, const DType *lhs, const DType *rhs, DType *out, const Shape lshape, const Shape rshape, const Shape oshape) { - for (int idx = 0; idx < N; ++idx) { + for (size_t idx = 0; idx < N; ++idx) { binary_broadcast_assign(idx, addto, lhs, rhs, out, lshape, rshape, oshape); } } @@ -188,26 +188,26 @@ template void BinaryBroadcastComputeImpl(Stream *s, const OpReqType req, const TBlob& lhs, const TBlob& rhs, const TBlob& out) { if (req == kNullOp) return; - int N = out.shape_.Size(); + size_t N = out.shape_.Size(); binary_broadcast_compute(N, req == kAddTo, lhs.dptr(), rhs.dptr(), out.dptr(), lhs.shape_.get(), rhs.shape_.get(), out.shape_.get()); } template -void seq_reduce_compute(const int N, const int M, const bool addto, +void seq_reduce_compute(const size_t N, const size_t M, const bool addto, const DType *big, DType *small, const Shape bshape, const Shape sshape, const Shape rshape, const Shape rstride) { #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) - for (int idx = 0; idx < N; ++idx) { + for (index_t idx = 0; idx < static_cast(N); ++idx) { seq_reduce_assign(idx, M, addto, big, small, bshape, sshape, rshape, rstride); } } template -void seq_reduce_compute_extra_mem(const int N, const int M, const bool addto, +void seq_reduce_compute_extra_mem(const size_t N, const size_t M, const bool addto, const DType* big, DType* small, const Shape bshape, const Shape sshape, @@ -215,12 +215,12 @@ void seq_reduce_compute_extra_mem(const int N, const int M, const bool addto, const Shape rstride, const index_t* ws_dptr) { #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) - for (int idx = 0; idx < N; ++idx) { + for (index_t idx = 0; idx < static_cast(N); ++idx) { Shape coord = unravel(idx, sshape); - int j = ravel(coord, bshape); + index_t j = ravel(coord, bshape); DType val, residual; Reducer::SetInitValue(val, residual); - for (int k = 0; k < M; ++k) { + for (size_t k = 0; k < M; ++k) { Reducer::Reduce(val, OP::Map(big[j + ws_dptr[k]]), residual); } assign(&small[idx], addto, val); @@ -233,7 +233,7 @@ void Reduce(Stream* s, const TBlob& small, const OpReqType req, if (req == kNullOp) return; Shape rshape, rstride; diff(small.shape_.get(), big.shape_.get(), &rshape, &rstride); - int N = small.shape_.Size(), M = rshape.Size(); + size_t N = small.shape_.Size(), M = rshape.Size(); seq_reduce_compute( N, M, req == kAddTo, big.dptr(), small.dptr(), big.shape_.get(), small.shape_.get(), rshape, rstride); @@ -247,9 +247,9 @@ void ReduceWithExtraMem(Stream* s, const TBlob& small, const OpReqType req, Shape rshape, rstride; diff(small.shape_.get(), big.shape_.get(), &rshape, &rstride); index_t* ws_dptr = reinterpret_cast(workspace.dptr_); - int N = small.shape_.Size(), M = rshape.Size(); + size_t N = small.shape_.Size(), M = rshape.Size(); #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) - for (int k = 0; k < M; k++) { + for (index_t k = 0; k < static_cast(M); k++) { Shape coord = unravel(k, rshape); ws_dptr[k] = dot(coord, rstride); } @@ -272,7 +272,7 @@ size_t ReduceWorkspaceSize(Stream *s, const TShape& small, const OpReqType } template -MSHADOW_XINLINE void seq_reduce_assign(const int idx, const int M, const bool addto, +MSHADOW_XINLINE void seq_reduce_assign(const index_t idx, const size_t M, const bool addto, const DType* __restrict big, const DType* __restrict lhs, const DType* __restrict rhs, DType *small, const Shape& big_shape, const Shape& lhs_shape0, @@ -282,20 +282,20 @@ MSHADOW_XINLINE void seq_reduce_assign(const int idx, const int M, const bool ad const Shape& rstride, const Shape& lhs_stride, const Shape& rhs_stride) { Shape coord = unravel(idx, small_shape); - const int idx_big0 = ravel(coord, big_shape); - const int idx_lhs0 = ravel(coord, lhs_shape0); - const int idx_rhs0 = ravel(coord, rhs_shape0); + const index_t idx_big0 = ravel(coord, big_shape); + const index_t idx_lhs0 = ravel(coord, lhs_shape0); + const index_t idx_rhs0 = ravel(coord, rhs_shape0); DType val, residual; Reducer::SetInitValue(val, residual); - for (int k = 0; k < M; ++k) { + for (size_t k = 0; k < M; ++k) { Shape coord_big = unravel(k, rshape); - int idx_big = idx_big0 + dot(coord_big, rstride); + index_t idx_big = idx_big0 + dot(coord_big, rstride); Shape coord_lhs = unravel(k, lhs_shape); - int idx_lhs = idx_lhs0 + dot(coord_lhs, lhs_stride); + index_t idx_lhs = idx_lhs0 + dot(coord_lhs, lhs_stride); Shape coord_rhs = unravel(k, rhs_shape); - int idx_rhs = idx_rhs0 + dot(coord_rhs, rhs_stride); + index_t idx_rhs = idx_rhs0 + dot(coord_rhs, rhs_stride); Reducer::Reduce(val, OP1::Map(big[idx_big], OP2::Map(lhs[idx_lhs], rhs[idx_rhs])), residual); } @@ -304,7 +304,7 @@ MSHADOW_XINLINE void seq_reduce_assign(const int idx, const int M, const bool ad } template -void seq_reduce_compute(const int N, const int M, const bool addto, +void seq_reduce_compute(const size_t N, const size_t M, const bool addto, const DType *big, const DType *lhs, const DType *rhs, DType *small, const Shape big_shape, const Shape small_shape, const Shape rshape, const Shape rstride, @@ -312,7 +312,7 @@ void seq_reduce_compute(const int N, const int M, const bool addto, const Shape rhs_shape, const Shape rhs_stride, const Shape& lhs_shape0, const Shape& rhs_shape0) { #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) - for (int idx = 0; idx < N; ++idx) { + for (index_t idx = 0; idx < static_cast(N); ++idx) { seq_reduce_assign(idx, M, addto, big, lhs, rhs, small, big_shape, lhs_shape0, rhs_shape0, small_shape, rshape, lhs_shape, rhs_shape, rstride, lhs_stride, rhs_stride); @@ -326,8 +326,8 @@ void Reduce(Stream *s, const TBlob& small, const OpReqType req, if (req == kNullOp) return; Shape rshape, rstride; diff(small.shape_.get(), big.shape_.get(), &rshape, &rstride); - int N = small.shape_.Size(); - int M = rshape.Size(); + size_t N = small.shape_.Size(); + size_t M = rshape.Size(); Shape lhs_shape, lhs_stride; diff(small.shape_.get(), lhs.shape_.get(), &lhs_shape, &lhs_stride); diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index 77236e068f86..c39418dbe41d 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -36,7 +36,7 @@ struct TakeCPU { // K is the number of rows of in_data // i is the index of out_data template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data, const IType* idx, const size_t M, const int64_t K) { int64_t j = static_cast(idx[i]); if (clip) { @@ -420,19 +420,19 @@ inline void SparseEmbeddingOpBackwardRspImpl(const bool deterministic, template inline typename std::enable_if<(!std::is_same::value), void>::type -GatherNDBackwardImpl(int N, int M, int K, +GatherNDBackwardImpl(index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, const IType* indices, mshadow::Stream *s) { #pragma omp parallel for - for (int i = 0; i < N; i++) { - int offset = 0; - for (int j = 0; j < M; ++j) { - offset += strides[j] * static_cast(indices[j*N + i]); + for (index_t i = 0; i < N; i++) { + index_t offset = 0; + for (index_t j = 0; j < M; ++j) { + offset += strides[j] * static_cast(indices[j*N + i]); } - for (int j = 0; j < K; ++j) { + for (index_t j = 0; j < K; ++j) { #pragma omp atomic out[offset + j] += data[i * K + j]; } @@ -441,18 +441,18 @@ GatherNDBackwardImpl(int N, int M, int K, template inline typename std::enable_if::value, void>::type -GatherNDBackwardImpl(int N, int M, int K, +GatherNDBackwardImpl(index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, const IType* indices, mshadow::Stream *s) { - for (int i = 0; i < N; i++) { - int offset = 0; - for (int j = 0; j < M; ++j) { - offset += strides[j] * static_cast(indices[j*N + i]); + for (index_t i = 0; i < N; i++) { + index_t offset = 0; + for (index_t j = 0; j < M; ++j) { + offset += strides[j] * static_cast(indices[j*N + i]); } - for (int j = 0; j < K; ++j) { + for (index_t j = 0; j < K; ++j) { out[offset + j] += data[i * K + j]; } } diff --git a/src/operator/tensor/indexing_op.cu b/src/operator/tensor/indexing_op.cu index 0d72b1815fde..bad3e5a1a6c5 100644 --- a/src/operator/tensor/indexing_op.cu +++ b/src/operator/tensor/indexing_op.cu @@ -439,22 +439,22 @@ inline void SparseEmbeddingOpBackwardRspImpl(const bool deterministic, struct backward_gather_nd_gpu { template - MSHADOW_XINLINE static void Map(int i, int N, int M, int K, + MSHADOW_XINLINE static void Map(index_t i, index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, const IType* indices) { - int offset = 0; - for (int j = 0; j < M; ++j) { + index_t offset = 0; + for (index_t j = 0; j < M; ++j) { offset += strides[j] * static_cast(indices[j*N + i]); } - for (int j = 0; j < K; ++j) { + for (index_t j = 0; j < K; ++j) { atomicAdd(out + (offset + j), data[i * K + j]); } } }; template -inline void GatherNDBackwardImpl(int N, int M, int K, +inline void GatherNDBackwardImpl(index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index 92b6e21018e5..fba331e25705 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -314,7 +314,8 @@ struct Take { * \param axis axis id */ template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data, const IType* idx, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data, + const IType* idx, const mshadow::Shape<10> in_stride, const mshadow::Shape<10> out_stride, const int in_ndims, const int out_ndims, const int idx_ndims, @@ -361,7 +362,7 @@ struct TakeRspKernel { * \param nnr number of non-zero rows */ template - MSHADOW_XINLINE static void Map(int i, + MSHADOW_XINLINE static void Map(index_t i, const IType* data, DType* out, const RType* weight_idx, @@ -1395,15 +1396,15 @@ inline bool ScatterNDType(const nnvm::NodeAttrs& attrs, struct scatter_nd { template - MSHADOW_XINLINE static void Map(int i, OpReqType req, int N, int M, int K, + MSHADOW_XINLINE static void Map(index_t i, OpReqType req, index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, const IType* indices) { - int offset = 0; - for (int j = 0; j < M; ++j) { - offset += strides[j] * static_cast(indices[j*N + i]); + index_t offset = 0; + for (index_t j = 0; j < M; ++j) { + offset += strides[j] * static_cast(indices[j*N + i]); } - for (int j = 0; j < K; ++j) { + for (index_t j = 0; j < K; ++j) { KERNEL_ASSIGN(out[offset+j], req, data[i*K + j]); } } @@ -1416,17 +1417,18 @@ void ScatterNDForward(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { using namespace mshadow; + using nnvm::dim_t; CHECK_EQ(inputs.size(), 2U); CHECK_EQ(outputs.size(), 1U); if (req[0] == kNullOp) return; mshadow::Stream *s = ctx.get_stream(); const TShape& oshape = outputs[0].shape_; const TShape& ishape = inputs[1].shape_; - int M = ishape[0]; - int N = ishape.Size() / M; - int K = oshape.ProdShape(M, oshape.ndim()); + dim_t M = ishape[0]; + dim_t N = ishape.Size() / M; + dim_t K = oshape.ProdShape(M, oshape.ndim()); mshadow::Shape<10> strides; - for (int i = M-1, stride = K; i >= 0; stride *= oshape[i], --i) strides[i] = stride; + for (dim_t i = M-1, stride = K; i >= 0; stride *= oshape[i], --i) strides[i] = stride; if (kWriteTo == req[0]) { Fill(s, outputs[0], req[0], 0); } @@ -1441,7 +1443,7 @@ void ScatterNDForward(const nnvm::NodeAttrs& attrs, template inline typename std::enable_if<(!std::is_same::value), void>::type -GatherNDBackwardImpl(int N, int M, int K, +GatherNDBackwardImpl(index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, @@ -1450,7 +1452,7 @@ GatherNDBackwardImpl(int N, int M, int K, template inline typename std::enable_if::value, void>::type -GatherNDBackwardImpl(int N, int M, int K, +GatherNDBackwardImpl(index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, @@ -1458,7 +1460,7 @@ GatherNDBackwardImpl(int N, int M, int K, mshadow::Stream *s); template -inline void GatherNDBackwardImpl(int N, int M, int K, +inline void GatherNDBackwardImpl(index_t N, index_t M, index_t K, const mshadow::Shape<10> strides, DType* out, const DType* data, @@ -1472,17 +1474,18 @@ void GatherNDBackward(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { using namespace mshadow; + using nnvm::dim_t; CHECK_EQ(inputs.size(), 2U); CHECK_EQ(outputs.size(), 1U); if (req[0] == kNullOp) return; mshadow::Stream *s = ctx.get_stream(); const TShape& oshape = outputs[0].shape_; const TShape& ishape = inputs[1].shape_; - int M = ishape[0]; - int N = ishape.Size() / M; - int K = oshape.ProdShape(M, oshape.ndim()); + dim_t M = ishape[0]; + dim_t N = ishape.Size() / M; + dim_t K = oshape.ProdShape(M, oshape.ndim()); mshadow::Shape<10> strides; - for (int i = M-1, stride = K; i >= 0; stride *= oshape[i], --i) strides[i] = stride; + for (dim_t i = M-1, stride = K; i >= 0; stride *= oshape[i], --i) strides[i] = stride; if (kWriteTo == req[0]) { Fill(s, outputs[0], req[0], 0); } diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h index 4e52b087f10a..e9e67cb1a4c5 100644 --- a/src/operator/tensor/init_op.h +++ b/src/operator/tensor/init_op.h @@ -453,7 +453,7 @@ void EyeFill(const nnvm::NodeAttrs& attrs, struct range_fwd { template - MSHADOW_XINLINE static void Map(int i, int repeat, DType start, DType step, + MSHADOW_XINLINE static void Map(index_t i, int repeat, DType start, DType step, int req, DType* out) { KERNEL_ASSIGN(out[i], req, start + (i/repeat) * step); } @@ -471,8 +471,8 @@ void RangeCompute(const nnvm::NodeAttrs& attrs, MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { // Force unsigned params to take two's complement form on ARM to ensure consistency with x86 // results. Casting negative floats to unsigned types is undefined in the CPP standard. - auto step = std::is_signed() ? param.step : static_cast(param.step); - auto start = std::is_signed() ? param.start : static_cast(param.start); + auto step = std::is_signed() ? param.step : static_cast(param.step); + auto start = std::is_signed() ? param.start : static_cast(param.start); Kernel::Launch(s, outputs[0].Size(), static_cast(param.repeat), diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index 9c81d87464de..3b229cf38eba 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -626,9 +626,9 @@ inline void GetIndexRange(const TShape& dshape, const nnvm::Tuple>& param_begin, const nnvm::Tuple>& param_end, const nnvm::Tuple>& param_step, - common::StaticArray* begin, - common::StaticArray* end, - common::StaticArray* step) { + common::StaticArray* begin, + common::StaticArray* end, + common::StaticArray* step) { CHECK_NE(dshape.ndim(), 0U); CHECK_LE(param_begin.ndim(), dshape.ndim()) << "Slicing axis exceeds data dimensions"; @@ -646,8 +646,8 @@ inline void GetIndexRange(const TShape& dshape, } for (index_t i = 0; i < param_begin.ndim(); ++i) { - int b = 0, e = dshape[i], s = 1; - const int len = dshape[i]; + index_t b = 0, e = dshape[i], s = 1; + const index_t len = dshape[i]; if (param_step.ndim() != 0U) { const auto& opt_step_val = param_step[i]; if (opt_step_val.has_value()) { @@ -724,7 +724,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs, TShape oshape = dshape; MXNET_NDIM_SWITCH(dshape.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end, &step); for (index_t i = 0; i < param.begin.ndim(); ++i) { const int b = begin[i], e = end[i], s = step[i]; @@ -743,19 +743,19 @@ template struct slice_forward { // i is the i-th row after flattening out into 2D tensor template - MSHADOW_XINLINE static void Map(int i, DType* out, const DType* data, + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* data, const mshadow::Shape dshape, const mshadow::Shape oshape, - const common::StaticArray begin, - const common::StaticArray step) { - const int data_last_dim_size = dshape[ndim-1]; - const int out_last_dim_size = oshape[ndim-1]; - const int step_last_dim = step[ndim-1]; - const int begin_last_dim = begin[ndim-1]; - const int j = i % out_last_dim_size; - int irow = 0; // row id of flattend 2D data - int stride = 1; - int idx = i / out_last_dim_size; + const common::StaticArray begin, + const common::StaticArray step) { + const index_t data_last_dim_size = dshape[ndim-1]; + const index_t out_last_dim_size = oshape[ndim-1]; + const index_t step_last_dim = step[ndim-1]; + const index_t begin_last_dim = begin[ndim-1]; + const index_t j = i % out_last_dim_size; + index_t irow = 0; // row id of flattend 2D data + index_t stride = 1; + index_t idx = i / out_last_dim_size; #pragma unroll for (int k = ndim - 2; k >= 0; --k) { irow += stride * ((idx % oshape[k]) * step[k] + begin[k]); @@ -771,20 +771,20 @@ template struct slice_forward { // i is the i-th row after flattening out into 2D tensor template - MSHADOW_XINLINE static void Map(int i, DType* out, const DType* data, + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* data, const mshadow::Shape dshape, const mshadow::Shape oshape, - const common::StaticArray begin, - const common::StaticArray step) { - const int data_last_dim_size = dshape[ndim-1]; - const int out_last_dim_size = oshape[ndim-1]; - const int step_last_dim = step[ndim-1]; - const int begin_last_dim = begin[ndim-1]; - int out_offset = i * out_last_dim_size; - for (int j = 0; j < out_last_dim_size; ++j) { - int irow = 0; // row id of flattend 2D data - int stride = 1; - int idx = i; + const common::StaticArray begin, + const common::StaticArray step) { + const index_t data_last_dim_size = dshape[ndim-1]; + const index_t out_last_dim_size = oshape[ndim-1]; + const index_t step_last_dim = step[ndim-1]; + const index_t begin_last_dim = begin[ndim-1]; + index_t out_offset = i * out_last_dim_size; + for (index_t j = 0; j < out_last_dim_size; ++j) { + index_t irow = 0; // row id of flattend 2D data + index_t stride = 1; + index_t idx = i; #pragma unroll for (int k = ndim - 2; k >= 0; --k) { irow += stride * ((idx % oshape[k]) * step[k] + begin[k]); @@ -813,11 +813,11 @@ void SliceOpForward(const nnvm::NodeAttrs& attrs, const TBlob& out = outputs[0]; const SliceParam& param = nnvm::get(attrs.parsed); MXNET_NDIM_SWITCH(data.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step); MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - int num_threads = out.shape_.FlatTo2D()[0]; + size_t num_threads = out.shape_.FlatTo2D()[0]; if (std::is_same::value) { num_threads *= out.shape_.get()[ndim - 1]; } @@ -836,20 +836,20 @@ template struct slice_assign { // i is the i-th row after flattening out into 2D tensor template - MSHADOW_XINLINE static void Map(int i, DType* out, const DType* val, + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* val, const mshadow::Shape oshape, const mshadow::Shape vshape, - const common::StaticArray begin, - const common::StaticArray step) { - const int data_last_dim_size = oshape[ndim-1]; - const int out_last_dim_size = vshape[ndim-1]; - const int step_last_dim = step[ndim-1]; - const int begin_last_dim = begin[ndim-1]; - int offset = i * out_last_dim_size; - for (int j = 0; j < out_last_dim_size; ++j) { - int irow = 0; // row id of flattend 2D out - int stride = 1; - int idx = i; + const common::StaticArray begin, + const common::StaticArray step) { + const index_t data_last_dim_size = oshape[ndim-1]; + const index_t out_last_dim_size = vshape[ndim-1]; + const index_t step_last_dim = step[ndim-1]; + const index_t begin_last_dim = begin[ndim-1]; + index_t offset = i * out_last_dim_size; + for (index_t j = 0; j < out_last_dim_size; ++j) { + index_t irow = 0; // row id of flattend 2D out + index_t stride = 1; + index_t idx = i; #pragma unroll for (int k = ndim - 2; k >= 0; --k) { irow += stride * ((idx % vshape[k]) * step[k] + begin[k]); @@ -866,19 +866,19 @@ template struct slice_assign { // i is the i-th row after flattening out into 2D tensor template - MSHADOW_XINLINE static void Map(int i, DType* out, const DType* val, + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* val, const mshadow::Shape oshape, const mshadow::Shape vshape, - const common::StaticArray begin, - const common::StaticArray step) { - const int data_last_dim_size = oshape[ndim-1]; - const int out_last_dim_size = vshape[ndim-1]; - const int step_last_dim = step[ndim-1]; - const int begin_last_dim = begin[ndim-1]; - const int j = i % out_last_dim_size; - int irow = 0; // row id of flattend 2D out - int stride = 1; - int idx = i / out_last_dim_size; + const common::StaticArray begin, + const common::StaticArray step) { + const index_t data_last_dim_size = oshape[ndim-1]; + const index_t out_last_dim_size = vshape[ndim-1]; + const index_t step_last_dim = step[ndim-1]; + const index_t begin_last_dim = begin[ndim-1]; + const index_t j = i % out_last_dim_size; + index_t irow = 0; // row id of flattend 2D out + index_t stride = 1; + index_t idx = i / out_last_dim_size; #pragma unroll for (int k = ndim - 2; k >= 0; --k) { irow += stride * ((idx % vshape[k]) * step[k] + begin[k]); @@ -911,7 +911,7 @@ void SliceOpBackward(const nnvm::NodeAttrs& attrs, LOG(FATAL) << "_slice_backward does not support kWriteInplace"; } MXNET_NDIM_SWITCH(ograd.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(igrad.shape_, param.begin, param.end, param.step, &begin, &end, &step); MSHADOW_TYPE_SWITCH(ograd.type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { @@ -937,7 +937,7 @@ inline bool SliceAssignOpShape(const nnvm::NodeAttrs& attrs, TShape vshape = dshape; // vshape is the value shape on the right hand side const SliceParam& param = nnvm::get(attrs.parsed); MXNET_NDIM_SWITCH(dshape.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end, &step); for (index_t i = 0; i < param.begin.ndim(); ++i) { const int b = begin[i], e = end[i], s = step[i]; @@ -975,7 +975,7 @@ void SliceAssignOpForward(const nnvm::NodeAttrs& attrs, const SliceParam& param = nnvm::get(attrs.parsed); MXNET_NDIM_SWITCH(data.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step); MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { @@ -1024,20 +1024,20 @@ template struct slice_assign_scalar { // i is the i-th row after flattening out into 2D tensor template - MSHADOW_XINLINE static void Map(int i, DType* out, const DType val, + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType val, const OpReqType req, const mshadow::Shape oshape, const mshadow::Shape vshape, - const common::StaticArray begin, - const common::StaticArray step) { - const int data_last_dim_size = oshape[ndim-1]; - const int out_last_dim_size = vshape[ndim-1]; - const int step_last_dim = step[ndim-1]; - const int begin_last_dim = begin[ndim-1]; - for (int j = 0; j < out_last_dim_size; ++j) { - int irow = 0; // row id of flattend 2D out - int stride = 1; - int idx = i; + const common::StaticArray begin, + const common::StaticArray step) { + const index_t data_last_dim_size = oshape[ndim-1]; + const index_t out_last_dim_size = vshape[ndim-1]; + const index_t step_last_dim = step[ndim-1]; + const index_t begin_last_dim = begin[ndim-1]; + for (index_t j = 0; j < out_last_dim_size; ++j) { + index_t irow = 0; // row id of flattend 2D out + index_t stride = 1; + index_t idx = i; #pragma unroll for (int k = ndim - 2; k >= 0; --k) { irow += stride * ((idx % vshape[k]) * step[k] + begin[k]); @@ -1076,7 +1076,7 @@ void SliceAssignScalarOpForward(const nnvm::NodeAttrs& attrs, TShape vshape = data.shape_; const SliceAssignScalarParam& param = nnvm::get(attrs.parsed); MXNET_NDIM_SWITCH(data.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step); for (index_t i = 0; i < param.begin.ndim(); ++i) { const int b = begin[i], e = end[i], s = step[i]; @@ -1107,7 +1107,7 @@ struct SliceAxisParam : public dmlc::Parameter { }; inline void GetSliceAxisParams(const SliceAxisParam& param, const TShape& ishape, - int* axis, int* begin, int* end) { + int* axis, index_t* begin, index_t* end) { *axis = param.axis; if (*axis < 0) { *axis += static_cast(ishape.ndim()); @@ -1115,7 +1115,7 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const TShape& ishape CHECK(*axis < static_cast(ishape.ndim()) && *axis >= 0) << "Transformed axis must be smaller than the source ndim and larger than zero! Recieved axis=" << param.axis << ", src_ndim=" << ishape.ndim() << ", transformed axis=" << *axis; - int axis_size = static_cast(ishape[*axis]); + index_t axis_size = static_cast(ishape[*axis]); *begin = param.begin; *end = -1; if (*begin < 0) { @@ -1149,7 +1149,8 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); TShape& ishape = (*in_attrs)[0]; - int axis, begin, end; + int axis; + index_t begin, end; GetSliceAxisParams(param, ishape, &axis, &begin, &end); TShape shape(ishape.ndim()); for (index_t i = 0; i < ishape.ndim(); ++i) { @@ -1173,7 +1174,8 @@ void SliceAxis(const nnvm::NodeAttrs& attrs, using namespace mshadow::expr; const SliceAxisParam& param = nnvm::get(attrs.parsed); mshadow::Stream *s = ctx.get_stream(); - int axis, begin, end; + int axis; + index_t begin, end; GetSliceAxisParams(param, inputs[0].shape_, &axis, &begin, &end); int ndim = static_cast(outputs[0].ndim()); @@ -1207,7 +1209,8 @@ void SliceAxisGrad_(const nnvm::NodeAttrs& attrs, using namespace mshadow::op; using namespace mshadow::expr; mshadow::Stream *s = ctx.get_stream(); - int axis, begin, end; + int axis; + index_t begin, end; GetSliceAxisParams(param, outputs[0].shape_, &axis, &begin, &end); int ndim = static_cast(outputs[0].shape_.ndim()); @@ -1354,7 +1357,7 @@ void SliceLikeForward(const nnvm::NodeAttrs& attrs, SliceLikeInferRanges(ishape, from_shape, param.axes, ¶m_begin, ¶m_end, ¶m_step); MXNET_NDIM_SWITCH(data.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(data.shape_, param_begin, param_end, param_step, &begin, &end, &step); MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { @@ -1400,7 +1403,7 @@ void SliceLikeBackward(const nnvm::NodeAttrs& attrs, SliceLikeInferRanges(ishape, from_shape, param.axes, ¶m_begin, ¶m_end, ¶m_step); MXNET_NDIM_SWITCH(ograd.ndim(), ndim, { - common::StaticArray begin, end, step; + common::StaticArray begin, end, step; GetIndexRange(ograd.shape_, param_begin, param_end, param_step, &begin, &end, &step); MSHADOW_TYPE_SWITCH(ograd.type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { @@ -1429,7 +1432,7 @@ struct ClipParam : public dmlc::Parameter { struct clip { template - MSHADOW_XINLINE static void Map(int i, DType* out, const DType* datas, + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* datas, DType a_min, DType a_max) { DType data = datas[i]; if (data > a_max) { @@ -1445,7 +1448,7 @@ struct clip { struct clip_grad { template - MSHADOW_XINLINE static void Map(int i, DType* out, const DType* grad, const DType* datas, + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* grad, const DType* datas, DType a_min, DType a_max) { DType data = datas[i]; if (data > a_max) { @@ -1934,7 +1937,7 @@ struct reverse { } #ifdef __CUDACC__ template - __device__ static void Map(int index, index_t nreversedim, const DType *src, DType *dst, + __device__ static void Map(index_t index, index_t nreversedim, const DType *src, DType *dst, const index_t * stride_, const index_t * trailing_) { __shared__ index_t stride_share[REVERSE_MAX_DIM]; @@ -1949,7 +1952,7 @@ struct reverse { } #else template - MSHADOW_XINLINE static void Map(int index, index_t nreversedim, const DType *src, DType *dst, + MSHADOW_XINLINE static void Map(index_t index, index_t nreversedim, const DType *src, DType *dst, const index_t * stride_, const index_t * trailing_) { index_t new_idx = ReverseIndex(index, nreversedim, stride_, trailing_); @@ -2141,10 +2144,10 @@ struct SqueezeParam : public dmlc::Parameter { // move all the zeros to the last of the shape array // and keep the relative order of the non-zero values. // Returns the new shape size after moving all zeros to the end. -inline uint32_t SqueezeShapeHelper(TShape* shape) { +inline size_t SqueezeShapeHelper(TShape* shape) { CHECK(shape != nullptr); - uint32_t count = 0; - for (uint32_t i = 0; i < shape->ndim(); ++i) { + size_t count = 0; + for (size_t i = 0; i < shape->ndim(); ++i) { if ((*shape)[i] == 0) { ++count; } else { @@ -2167,7 +2170,7 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs, if (param.axis.has_value()) { // preprocess axis TShape axes = param.axis.value(); - for (uint32_t i = 0; i < axes.ndim(); ++i) { + for (size_t i = 0; i < axes.ndim(); ++i) { if (axes[i] < 0) { axes[i] += dndim; CHECK_GE(axes[i], 0) @@ -2182,11 +2185,11 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs, oshape[axes[i]] = 0; } } else { - for (uint32_t i = 0; i < oshape.ndim(); ++i) { + for (size_t i = 0; i < oshape.ndim(); ++i) { if (oshape[i] == 1) oshape[i] = 0; } } - uint32_t oshape_size = SqueezeShapeHelper(&oshape); + size_t oshape_size = SqueezeShapeHelper(&oshape); if (oshape_size == 0) { // corner case when dshape is (1, 1, 1, 1) oshape[0] = 1; oshape_size = 1; @@ -2229,7 +2232,7 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs, expected_out[0] = in_shape[0]; expected_out[1] = in_shape[1] / (block * block); - uint32_t i = 2; + size_t i = 2; while (i < expected_out.ndim()) { expected_out[i] = in_shape[i] * block; ++i; @@ -2259,9 +2262,9 @@ inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs, * \param inp_index index within input tensor from where value is retrieved * \param offset_arr array containing the linear offset of input tensor */ -MSHADOW_XINLINE void update_index(int index_position, int dim_size, int *idx, - int *inp_index, const int* offset_arr) { - int next_idx_val = *idx / dim_size; +MSHADOW_XINLINE void update_index(index_t index_position, index_t dim_size, index_t *idx, + index_t *inp_index, const index_t* offset_arr) { + index_t next_idx_val = *idx / dim_size; *inp_index += (*idx - next_idx_val * dim_size) * offset_arr[index_position]; *idx = next_idx_val; } @@ -2280,9 +2283,9 @@ MSHADOW_XINLINE void update_index(int index_position, int dim_size, int *idx, template struct depth_to_space_forward { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data, - const int block, const int* size, const int* offset_arr) { - int inp_index = 0, idx = i, dim_size; + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data, + const int block, const index_t* size, const index_t* offset_arr) { + index_t inp_index = 0, idx = i, dim_size; dim_size = block; update_index(2, dim_size, &idx, &inp_index, offset_arr); dim_size = size[3]; @@ -2315,9 +2318,9 @@ struct depth_to_space_forward { template struct compute_offset_for_depth_to_space { template - MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block, - const int32_t size0, const int32_t size1, const int32_t size2, - const int32_t size3) { + MSHADOW_XINLINE static void Map(index_t i, DType* offset_arr, DType* size, const int block, + const index_t size0, const index_t size1, const index_t size2, + const index_t size3) { size[0] = size0; size[1] = size1; size[2] = size2; @@ -2349,10 +2352,10 @@ void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs, int block = param.block_size; mshadow::Tensor workspace = - ctx.requested[0].get_space_typed(mshadow::Shape1(sizeof(int32_t) * 10), s); + ctx.requested[0].get_space_typed(mshadow::Shape1(sizeof(index_t) * 10), s); char* workspace_curr_ptr = workspace.dptr_; - int32_t* offset_arr = reinterpret_cast(workspace_curr_ptr); - int32_t* size = reinterpret_cast(workspace_curr_ptr + sizeof(int32_t) * 6); + index_t* offset_arr = reinterpret_cast(workspace_curr_ptr); + index_t* size = reinterpret_cast(workspace_curr_ptr + sizeof(index_t) * 6); MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { @@ -2431,9 +2434,9 @@ inline bool SpaceToDepthOpType(const nnvm::NodeAttrs& attrs, template struct space_to_depth_forward { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data, const int block, - const int* size, const int* offset_arr) { - int inp_index = 0, idx = i, dim_size; + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data, const int block, + const index_t* size, const index_t* offset_arr) { + index_t inp_index = 0, idx = i, dim_size; dim_size = size[3] / block; update_index(4, dim_size, &idx, &inp_index, offset_arr); dim_size = size[2] / block; @@ -2466,9 +2469,9 @@ struct space_to_depth_forward { template struct compute_offset_for_space_to_depth { template - MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block, - const int32_t size0, const int32_t size1, - const int32_t size2, const int32_t size3) { + MSHADOW_XINLINE static void Map(index_t i, DType* offset_arr, DType* size, const int block, + const index_t size0, const index_t size1, + const index_t size2, const index_t size3) { size[0] = size0; size[1] = size1; size[2] = size2; @@ -2500,10 +2503,10 @@ void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs, int block = param.block_size; mshadow::Tensor workspace = - ctx.requested[0].get_space_typed(mshadow::Shape1(sizeof(int32_t) * 10), s); + ctx.requested[0].get_space_typed(mshadow::Shape1(sizeof(index_t) * 10), s); char* workspace_curr_ptr = workspace.dptr_; - int32_t* offset_arr = reinterpret_cast(workspace_curr_ptr); - int32_t* size = reinterpret_cast(workspace_curr_ptr + sizeof(int32_t) * 6); + index_t* offset_arr = reinterpret_cast(workspace_curr_ptr); + index_t* size = reinterpret_cast(workspace_curr_ptr + sizeof(index_t) * 6); MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { diff --git a/tests/nightly/test_large_array.py b/tests/nightly/test_large_array.py index 121acc174b51..3f2cf35daae1 100644 --- a/tests/nightly/test_large_array.py +++ b/tests/nightly/test_large_array.py @@ -17,18 +17,53 @@ import unittest import mxnet as mx +import numpy as np from mxnet import gluon, nd +# dimension constants +MEDIUM_X = 10000 +LARGE_X = MEDIUM_X * MEDIUM_X +SMALL_Y = 50 +LARGE_SIZE = LARGE_X * SMALL_Y class TestLargeArray(unittest.TestCase): - def test_ndarray2numpy(self): - m = gluon.nn.Embedding(14000, 128) + def test_gluon_embedding(self): + m = gluon.nn.Embedding(SMALL_Y, MEDIUM_X) m.initialize() - ind = nd.zeros((700000, 128)) - x = m(ind) - x.shape - test = x.asnumpy() - assert (x.shape == test.shape) + a = nd.zeros((MEDIUM_X, SMALL_Y)) + b = m(a) + assert b.shape == (MEDIUM_X, SMALL_Y, MEDIUM_X) + assert b.asnumpy().size == LARGE_SIZE + + def test_ndarray_zeros(self): + a = nd.zeros(shape=(LARGE_X, SMALL_Y)) + assert a[-1][0] == 0 + assert a.shape == (LARGE_X, SMALL_Y) + assert a.size == LARGE_SIZE + + def test_ndarray_ones(self): + a = nd.ones(shape=(LARGE_X, SMALL_Y)) + assert a[-1][0] == 1 + assert nd.sum(a).asnumpy() == LARGE_SIZE + + def test_ndarray_zeros2(self): + a = nd.zeros(shape=(LARGE_SIZE)) + assert a[LARGE_SIZE-1] == 0 + assert a.shape == (LARGE_SIZE,) + + def test_ndarray_arange(self): + a = nd.arange(0, LARGE_SIZE, dtype='int64') + assert a[-1] == LARGE_SIZE - 1 + assert nd.slice(a, begin=-2, end=-1) == (LARGE_SIZE - 2) + + def test_ndarray_random_uniform(self): + a = nd.random.uniform(shape=(LARGE_X, SMALL_Y)) + assert a[-1][0] != 0 + + def test_ndarray_empty(self): + a = np.empty((LARGE_SIZE,)) + b = nd.array(a) + assert b.shape == (LARGE_SIZE,) if __name__ == '__main__': unittest.main()