diff --git a/docs/faq/env_var.md b/docs/faq/env_var.md index 92cf4931b04b..e373377ee8de 100644 --- a/docs/faq/env_var.md +++ b/docs/faq/env_var.md @@ -196,6 +196,12 @@ When USE_PROFILER is enabled in Makefile or CMake, the following environments ca - Flag to enable or disable MKLDNN accelerator. On by default. - Only applies to mxnet that has been compiled with MKLDNN (```pip install mxnet-mkl``` or built from source with ```USE_MKLDNN=1```) +* MXNET_ENFORCE_DETERMINISM + - Values: 0(false) or 1(true) ```(default=0)``` + - If set to true, MXNet will only use deterministic algorithms in forward and backward computation. + If no such algorithm exists given other constraints, MXNet will error out. This variable affects the choice + of CUDNN convolution algorithms. Please see [CUDNN developer guide](https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html) for more details. + Settings for Minimum Memory Usage --------------------------------- - Make sure ```min(MXNET_EXEC_NUM_TEMP, MXNET_GPU_WORKER_NTHREADS) = 1``` diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index d63d46821edc..3bd6c5a3826b 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -889,6 +889,7 @@ class CuDNNConvolutionOp { size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo that matches the algo_preference (-1 = any), // regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -896,6 +897,9 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index c0c56507bbf3..72ba2c95fc6a 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -829,6 +829,7 @@ class CuDNNDeconvolutionOp { void AlgoFinalSelect(const std::vector &perf_results, std::string kernel_name, size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -836,6 +837,9 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index bc3ee366007c..89fa73ef5471 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,7 +48,12 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: + #if CUDNN_MAJOR >= 7 + mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? + CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; + #else mode_ = CUDNN_POOLING_MAX; + #endif break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) {