Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,13 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym,
//--------------------------------------------
// Part 4: Executor interface
//--------------------------------------------
/*!
* \brief Print the content of execution plan, used for debug.
* \param handle the executor.
* \param out_str pointer to hold the output string of the printing.
* \return 0 when success, -1 when failure happens
*/
MXNET_DLL int MXExecutorPrint(ExecutorHandle symbol, const char **out_str);
/*!
* \brief Executor forward method
*
Expand Down
22 changes: 13 additions & 9 deletions include/mxnet/resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,19 @@ struct ResourceRequest {
enum Type {
/*! \brief mshadow::Random<xpu> object */
kRandom,
/*! \brief Temporal space */
/*! \brief A dynamic temp space that can be arbitrary size */
kTempSpace
};
/*! \brief type of resources */
Type type;
/*! \brief size of space requested, in terms of number of reals */
size_t space_num_reals;
/*! \brief default constructor */
ResourceRequest() {}
/*!
* \brief constructor, allow implicit conversion
* \param type type of resources
*/
ResourceRequest(Type type, size_t space_num_reals = 0) // NOLINT(*)
: type(type), space_num_reals(space_num_reals) {}
ResourceRequest(Type type) // NOLINT(*)
: type(type) {}
};


Expand All @@ -48,11 +46,15 @@ struct Resource {
ResourceRequest req;
/*! \brief engine variable */
engine::VarHandle var;
/*! \brief identifier of id information, used for debug purpose */
int32_t id;
/*!
* \brief pointer to the resource, do not use directly,
* access using member functions
*/
void *ptr_;
/*! \brief default constructor */
Resource() : id(0) {}
/*!
* \brief Get random number generator.
* \param The stream to use in the random number generator.
Expand All @@ -70,7 +72,8 @@ struct Resource {
}
/*!
* \brief Get space requested as mshadow Tensor.
* The resulting tensor must fit in space requsted.
* The caller can request arbitrary size.
*
* \param shape the Shape of returning tensor.
* \param stream the stream of retruning tensor.
* \return the mshadow tensor requested.
Expand All @@ -81,9 +84,11 @@ struct Resource {
inline mshadow::Tensor<xpu, ndim, real_t> get_space(
mshadow::Shape<ndim> shape, mshadow::Stream<xpu> *stream) const {
CHECK_EQ(req.type, ResourceRequest::kTempSpace);
CHECK_GE(req.space_num_reals, shape.Size());
mshadow::TensorContainer<xpu, 1, real_t> *space =
static_cast<mshadow::TensorContainer<xpu, 1, real_t>*>(ptr_);
space->Resize(mshadow::Shape1(shape.Size()));
return mshadow::Tensor<xpu, ndim, real_t>(
static_cast<real_t*>(ptr_), shape, shape[ndim - 1], stream);
space->dptr_, shape, shape[ndim - 1], stream);
}
};

Expand All @@ -97,7 +102,6 @@ class ResourceManager {
* \return the requested resource.
* \note The returned resource's ownership is
* still hold by the manager singleton.
*
*/
virtual Resource Request(Context ctx, const ResourceRequest &req) = 0;
/*!
Expand Down
5 changes: 5 additions & 0 deletions include/mxnet/symbolic.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,11 @@ class Executor {
* \param head_grads the gradient of head nodes to be backproped.
*/
virtual void Backward(const std::vector<NDArray> &head_grads) = 0;
/*!
* \brief print the execution plan info to output stream.
* \param os the output stream we like to print to.
*/
virtual void Print(std::ostream &os) const {} // NOLINT(*)
/*!
* \brief get array of outputs in the executor.
* \return array of outputs in the executor.
Expand Down
17 changes: 15 additions & 2 deletions python/mxnet/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import ctypes
from .base import _LIB
from .base import c_array, mx_uint, NDArrayHandle, ExecutorHandle
from .base import check_call
from .base import mx_uint, NDArrayHandle, ExecutorHandle
from .base import check_call, c_array, py_str
from .ndarray import NDArray

class Executor(object):
Expand Down Expand Up @@ -81,6 +81,19 @@ def backward(self, head_grads=None):
ndarray = c_array(NDArrayHandle, [item.handle for item in head_grads])
check_call(_LIB.MXExecutorBackward(self.handle, len(head_grads), ndarray))

def debug_str(self):
"""Get a debug string about internal execution plan.

Returns
-------
debug_str : string
Debug string of the executor.
"""
debug_str = ctypes.c_char_p()
check_call(_LIB.MXExecutorPrint(
self.handle, ctypes.byref(debug_str)))
return py_str(debug_str.value)

@property
def outputs(self):
"""list all heads' output ndarray
Expand Down
11 changes: 11 additions & 0 deletions src/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,17 @@ int MXSymbolInferShape(SymbolHandle sym,
API_END();
}

int MXExecutorPrint(ExecutorHandle handle, const char **out_str) {
Executor *exec = static_cast<Executor*>(handle);
MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
API_BEGIN();
std::ostringstream os;
exec->Print(os);
ret->ret_str = os.str();
*out_str = (ret->ret_str).c_str();
API_END();
}

int MXExecutorForward(ExecutorHandle handle, bool is_train) {
API_BEGIN();
Executor *exec = static_cast<Executor*>(handle);
Expand Down
3 changes: 2 additions & 1 deletion src/engine/stream_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ template <std::size_t kNumGpus, std::size_t kStreams>
RunContext StreamManager<kNumGpus, kStreams>::GetIORunContext(
Context const& ctx) {
RunContext ret;
ret.stream = nullptr;
switch (ctx.dev_mask) {
case cpu::kDevMask: ret.stream = nullptr; break;
case cpu::kDevMask: break;
case gpu::kDevMask: {
#if MXNET_USE_CUDA
CUDA_CALL(cudaSetDevice(ctx.dev_id));
Expand Down
4 changes: 1 addition & 3 deletions src/operator/batch_norm-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,7 @@ class BatchNormProp : public OperatorProperty {

std::vector<ResourceRequest> BackwardResource(
const std::vector<TShape> &in_shape) const override {
const TShape &dshape = in_shape[0];
size_t nspace = dshape[1] * 3;
return {{ResourceRequest::kTempSpace, nspace}};
return {ResourceRequest::kTempSpace};
}

int NumVisibleOutputs() const override {
Expand Down
4 changes: 2 additions & 2 deletions src/operator/convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,12 @@ class ConvolutionProp : public OperatorProperty {

virtual std::vector<ResourceRequest> ForwardResource(
const std::vector<TShape> &in_shape) const {
return {{ResourceRequest::kTempSpace, param_.workspace}};
return {ResourceRequest::kTempSpace};
}

virtual std::vector<ResourceRequest> BackwardResource(
const std::vector<TShape> &in_shape) const {
return {{ResourceRequest::kTempSpace, param_.workspace}};
return {ResourceRequest::kTempSpace};
}

Operator* CreateOperator(Context ctx) const;
Expand Down
103 changes: 85 additions & 18 deletions src/resource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
* \brief Implementation of resource manager.
*/
#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/base.h>
#include <mxnet/engine.h>
#include <mxnet/resource.h>
#include <limits>
#include <atomic>
#include "./common/lazy_alloc_array.h"

namespace mxnet {
Expand All @@ -15,10 +18,15 @@ namespace resource {
// implements resource manager
class ResourceManagerImpl : public ResourceManager {
public:
ResourceManagerImpl() : global_seed_(0) {
ResourceManagerImpl() noexcept(false)
: global_seed_(0) {
cpu_temp_space_copy_ = dmlc::GetEnv("MXNET_CPU_TEMP_COPY", 16);
gpu_temp_space_copy_ = dmlc::GetEnv("MXNET_GPU_TEMP_COPY", 4);
engine_ref_ = Engine::_GetSharedRef();
cpu_rand_ = new ResourceRandom<cpu>(
Context(cpu::kDevMask, 0), global_seed_);
cpu_space_ = new ResourceTempSpace<cpu>(
Context(cpu::kDevMask, 0), cpu_temp_space_copy_);
}
~ResourceManagerImpl() {
// need explicit delete, before engine get killed
Expand All @@ -32,21 +40,31 @@ class ResourceManagerImpl : public ResourceManager {

// request resources
Resource Request(Context ctx, const ResourceRequest &req) override {
if (req.type == ResourceRequest::kRandom) {
if (ctx.dev_mask == cpu::kDevMask) {
return cpu_rand_->resource;
} else {
CHECK_EQ(ctx.dev_mask, gpu::kDevMask);
if (ctx.dev_mask == cpu::kDevMask) {
switch (req.type) {
case ResourceRequest::kRandom: return cpu_rand_->resource;
case ResourceRequest::kTempSpace: return cpu_space_->GetNext();
default: LOG(FATAL) << "Unknown supported type " << req.type;
}
} else {
CHECK_EQ(ctx.dev_mask, gpu::kDevMask);
#if MSHADOW_USE_CUDA
return gpu_rand_.Get(ctx.dev_id, [ctx, this]() {
return new ResourceRandom<gpu>(ctx, global_seed_);
})->resource;
switch (req.type) {
case ResourceRequest::kRandom: {
return gpu_rand_.Get(ctx.dev_id, [ctx, this]() {
return new ResourceRandom<gpu>(ctx, global_seed_);
})->resource;
}
case ResourceRequest::kTempSpace: {
return gpu_space_.Get(ctx.dev_id, [ctx, this]() {
return new ResourceTempSpace<gpu>(ctx, gpu_temp_space_copy_);
})->GetNext();
}
default: LOG(FATAL) << "Unknown supported type " << req.type;
}
#else
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
#endif
}
} else {
LOG(FATAL) << "Unknown supported type " << req.type;
}
Resource ret;
return ret;
Expand All @@ -67,16 +85,13 @@ class ResourceManagerImpl : public ResourceManager {
static constexpr std::size_t kMaxNumGPUs = 16;
/*! \brief Random number magic number to seed different random numbers */
static constexpr uint32_t kRandMagic = 127UL;
/*! \brief Reference to the engine */
std::shared_ptr<Engine> engine_ref_;

// the random number resources
template<typename xpu>
struct ResourceRandom {
/*! \brief pointer to PRNG */
mshadow::Random<xpu> *prnd;
/*! \brief the context of the PRNG */
Context ctx;
/*! \brief pointer to PRNG */
mshadow::Random<xpu> *prnd;
/*! \brief resource representation */
Resource resource;
/*! \brief constructor */
Expand All @@ -103,13 +118,65 @@ class ResourceManagerImpl : public ResourceManager {
}, ctx, {}, {resource.var});
}
};
// temporal space resource.
template<typename xpu>
struct ResourceTempSpace {
/*! \brief the context of the device */
Context ctx;
/*! \brief the underlying space */
std::vector<mshadow::TensorContainer<xpu, 1, real_t>*> space;
/*! \brief resource representation */
std::vector<Resource> resource;
/*! \brief current pointer to the round roubin alloator */
std::atomic<size_t> curr_ptr;
/*! \brief constructor */
explicit ResourceTempSpace(Context ctx, size_t ncopy)
: ctx(ctx), space(ncopy), resource(ncopy), curr_ptr(0) {
mshadow::SetDevice<xpu>(ctx.dev_id);
for (size_t i = 0; i < space.size(); ++i) {
space[i] = new mshadow::TensorContainer<xpu, 1, real_t>();
resource[i].var = Engine::Get()->NewVariable();
resource[i].id = static_cast<int32_t>(i);
resource[i].ptr_ = space[i];
resource[i].req = ResourceRequest(ResourceRequest::kTempSpace);
}
}
~ResourceTempSpace() {
for (size_t i = 0; i < space.size(); ++i) {
mshadow::TensorContainer<xpu, 1, real_t>* r = space[i];
Engine::Get()->DeleteVariable(
[r](RunContext rctx){ delete r; }, ctx, resource[i].var);
}
}
// get next resource in round roubin matter
inline Resource GetNext() {
const size_t kMaxDigit = std::numeric_limits<size_t>::max() / 2;
size_t ptr = ++curr_ptr;
// reset ptr to avoid undefined behavior during overflow
// usually this won't happen
if (ptr > kMaxDigit) {
curr_ptr.store((ptr + 1) % space.size());
}
return resource[ptr % space.size()];
}
};
/*! \brief number of copies in CPU temp space */
int cpu_temp_space_copy_;
/*! \brief number of copies in GPU temp space */
int gpu_temp_space_copy_;
/*! \brief Reference to the engine */
std::shared_ptr<Engine> engine_ref_;
/*! \brief internal seed to the random number generator */
uint32_t global_seed_;
/*! \brief CPU random number resources */
ResourceRandom<cpu> *cpu_rand_;
/*! \brief CPU temp space resources */
ResourceTempSpace<cpu> *cpu_space_;
#if MXNET_USE_CUDA
/*! \brief random number generator for GPU */
common::LazyAllocArray<ResourceRandom<gpu> > gpu_rand_;
/*! \brief temp space for GPU */
common::LazyAllocArray<ResourceTempSpace<gpu> > gpu_space_;
#endif
};
} // namespace resource
Expand Down
Loading