From 4dbbe06753ac665cab6a717b97a95c9c3e77c2ed Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Thu, 23 May 2019 16:46:18 -0700 Subject: [PATCH 1/3] GPU RNN to use TempSpace resource for workspace. --- src/operator/rnn-inl.h | 44 +++++++++++++++++++++++++----------------- src/operator/rnn.cc | 35 ++++++++++++++++++--------------- 2 files changed, 45 insertions(+), 34 deletions(-) diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index e43b3c9b5131..98b835a9c3b2 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -528,7 +528,6 @@ class RNNOp { CUDNN_CALL(cudnnDestroyTensorDescriptor(dy_desc_vec_[i])); } init_cudnn_ = false; - Storage::Get()->Free(temp_space_); Storage::Get()->Free(reserve_space_); } #if MXNET_USE_CUDNN_GE_7200 @@ -656,6 +655,12 @@ class RNNOp { Init(ctx, s, in_data, out_data); } + // Get temp space + int temp_size = workspace_size_; + Tensor temp_space = + ctx.requested[rnn_enum::kTempSpace].get_space_typed( + mshadow::Shape1(temp_size), s); + #if MXNET_USE_CUDNN_GE_7200 cudnnRNNDataLayout_t layout_t; @@ -749,7 +754,7 @@ class RNNOp { nullptr, nullptr, nullptr, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_, reserve_space_.dptr, reserve_space_byte_)); @@ -771,7 +776,7 @@ class RNNOp { hy_ptr, cy_desc_, cy_ptr, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_, reserve_space_.dptr, reserve_space_byte_)); @@ -802,7 +807,7 @@ class RNNOp { nullptr, nullptr, nullptr, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_)); #else CUDNN_CALL(cudnnRNNForwardInference(s->dnn_handle_, @@ -822,7 +827,7 @@ class RNNOp { hy_ptr, cy_desc_, cy_ptr, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_)); #endif } @@ -984,6 +989,12 @@ class RNNOp { Init(ctx, s, in_data, out_data); } + // Get temp space + int temp_size = workspace_size_; + Tensor temp_space = + ctx.requested[rnn_enum::kTempSpace].get_space_typed( + mshadow::Shape1(temp_size), s); + #if MXNET_USE_CUDNN_GE_7200 CUDNN_CALL(cudnnRNNBackwardDataEx(s->dnn_handle_, rnn_desc_, @@ -1011,7 +1022,7 @@ class RNNOp { dcx_ptr, nullptr, nullptr, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_, reserve_space_.dptr, reserve_space_byte_)); @@ -1023,7 +1034,7 @@ class RNNOp { hx.dptr_, y_data_desc_, y.dptr_, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_, dw_desc_, dw.dptr_, @@ -1053,7 +1064,7 @@ class RNNOp { dhx.dptr_, dcx_desc_, dcx_ptr, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_, reserve_space_.dptr, reserve_space_byte_)); @@ -1066,7 +1077,7 @@ class RNNOp { hx.dptr_, y_desc_vec_.data(), y.dptr_, - temp_space_.dptr, + temp_space.dptr_, workspace_byte_, dw_desc_, dw.dptr_, @@ -1301,17 +1312,16 @@ class RNNOp { strideA)); // Create Dropout descriptors - DType* dropout_states_ = NULL; if (param_.p > 0) { ctx.requested[rnn_enum::kCuDNNDropoutDescSpace].get_cudnn_dropout_desc (&dropout_desc_, s, 1.0f - param_.p, seed_); - } else { - dropout_byte_ = 0; } - + // Only update the probability by passing in a null dropout_states ptr + DType* dropout_states = NULL; + size_t dropout_bytes = 0; CUDNN_CALL(cudnnSetDropoutDescriptor(dropout_desc_, s->dnn_handle_, param_.p, // discard probability - dropout_states_, dropout_byte_, + dropout_states, dropout_bytes, seed_)); // RNN descriptors @@ -1392,8 +1402,6 @@ class RNNOp { workspace_size_ = workspace_byte_ / sizeof(DType); // Allocate the reserve space reserve_space_ = Storage::Get()->Alloc(reserve_space_byte_, Context::GPU(s->dev_id)); - // Allocate the temp space - temp_space_ = Storage::Get()->Alloc(workspace_byte_, Context::GPU(s->dev_id)); // Check that number of params are correct size_t cudnn_param_size; CUDNN_CALL(cudnnGetRNNParamsSize(s->dnn_handle_, @@ -1462,9 +1470,9 @@ class RNNOp { cudnnDirectionMode_t direction_; cudnnRNNInputMode_t input_mode_; cudnnDropoutDescriptor_t dropout_desc_; - Storage::Handle reserve_space_, temp_space_; + Storage::Handle reserve_space_; uint64_t seed_ = 17 + rand() % 4096; // NOLINT(runtime/threadsafe_fn) - size_t workspace_byte_, reserve_space_byte_, dropout_byte_; + size_t workspace_byte_, reserve_space_byte_; int workspace_size_; std::vector x_desc_vec_, y_desc_vec_, dx_desc_vec_, dy_desc_vec_; #if MXNET_USE_CUDNN_GE_7200 diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index 9b412a2575a1..df19fdff94f4 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -167,6 +167,22 @@ static bool RNNType(const nnvm::NodeAttrs& attrs, return true; } +static std::vector RNNResourceEx(const NodeAttrs& attrs, const int dev_mask, + const DispatchMode dispatch_mode) { + std::vector request; + if (dev_mask == kGPU) { +#if MXNET_USE_CUDNN_RNN + request.emplace_back(ResourceRequest::kTempSpace); + + const RNNParam& param = nnvm::get(attrs.parsed); + if (param.p != 0 && 1.0f - param.p > 0) { + request.emplace_back(ResourceRequest::kCuDNNDropoutDesc); + } +#endif + } + return request; +} + struct RNNGrad { const char *op_name; std::vector operator()(const nnvm::NodePtr &n, @@ -272,21 +288,7 @@ The definition of GRU here is slightly different from paper but compatible with .set_attr("FCreateOpState", CreateRNNState) .set_attr("FStatefulCompute", RNNStatefulCompute) .set_attr("FGradient", RNNGrad{"_backward_RNN"}) -.set_attr("FResourceRequestEx", - [](const NodeAttrs& attrs, const int dev_mask, const DispatchMode dispatch_mode) { - std::vector request; - if (dev_mask == kGPU) { -#if MXNET_USE_CUDNN_RNN - request.emplace_back(ResourceRequest::kTempSpace); - - const RNNParam& param = nnvm::get(attrs.parsed); - if (param.p != 0 && 1.0f - param.p > 0) { - request.emplace_back(ResourceRequest::kCuDNNDropoutDesc); - } -#endif - } - return request; -}) +.set_attr("FResourceRequestEx", RNNResourceEx) .add_argument("data", "NDArray-or-Symbol", "Input data to RNN") .add_argument("parameters", "NDArray-or-Symbol", "Vector of all RNN trainable parameters concatenated") @@ -306,6 +308,7 @@ NNVM_REGISTER_OP(_backward_RNN) .set_attr_parser(ParamParser) .set_attr("TIsLayerOpBackward", true) .set_attr("TIsBackward", true) -.set_attr("FStatefulCompute", RNNStatefulGradCompute); +.set_attr("FStatefulCompute", RNNStatefulGradCompute) +.set_attr("FResourceRequestEx", RNNResourceEx); } // namespace op } // namespace mxnet From a4de5aa42a9ce21595591366d2e72db99307c6eb Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Fri, 24 May 2019 15:40:14 -0700 Subject: [PATCH 2/3] Trigger CI. From cd96afc5ea9849c38aad9c07559d90b5a36d0a4d Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Fri, 24 May 2019 17:06:56 -0700 Subject: [PATCH 3/3] Fix syntax error after merge. --- src/operator/rnn.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index 9f0b586f68a6..6a0dbd7a4e23 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -181,6 +181,7 @@ static std::vector RNNResourceEx(const NodeAttrs& attrs, const #endif } return request; +} inline static bool RNNStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask,