Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
6 changes: 6 additions & 0 deletions docs/faq/env_var.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ When USE_PROFILER is enabled in Makefile or CMake, the following environments ca
- Flag to enable or disable MKLDNN accelerator. On by default.
- Only applies to mxnet that has been compiled with MKLDNN (```pip install mxnet-mkl``` or built from source with ```USE_MKLDNN=1```)

* MXNET_ENFORCE_DETERMINISM
- Values: 0(false) or 1(true) ```(default=0)```
- If set to true, MXNet will only use deterministic algorithms in forward and backward computation.
If no such algorithm exists given other constraints, MXNet will error out. This variable affects the choice
of CUDNN convolution algorithms. Please see [CUDNN developer guide](https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html) for more details.

Settings for Minimum Memory Usage
---------------------------------
- Make sure ```min(MXNET_EXEC_NUM_TEMP, MXNET_GPU_WORKER_NTHREADS) = 1```
Expand Down
4 changes: 4 additions & 0 deletions src/operator/nn/cudnn/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -889,13 +889,17 @@ class CuDNNConvolutionOp {
size_t workspace_byte, CuDNNAlgo<AlgoType> *algo) {
// Determine the fastest acceptable algo that matches the algo_preference (-1 = any),
// regardless of mathType.
bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false);
for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) {
const auto &result = perf_results[i];
bool algo_is_tensor_core = false;
#if CUDNN_MAJOR >= 7
algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH;
#endif
if (result.status == CUDNN_STATUS_SUCCESS &&
#if CUDNN_MAJOR >= 7
(!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) &&
#endif
(param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) {
algo->Set(result.algo, algo_is_tensor_core);
return;
Expand Down
4 changes: 4 additions & 0 deletions src/operator/nn/cudnn/cudnn_deconvolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -829,13 +829,17 @@ class CuDNNDeconvolutionOp {
void AlgoFinalSelect(const std::vector<PerfType> &perf_results, std::string kernel_name,
size_t workspace_byte, CuDNNAlgo<AlgoType> *algo) {
// Determine the fastest acceptable algo regardless of mathType.
bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false);
for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) {
const auto &result = perf_results[i];
bool algo_is_tensor_core = false;
#if CUDNN_MAJOR >= 7
algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH;
#endif
if (result.status == CUDNN_STATUS_SUCCESS &&
#if CUDNN_MAJOR >= 7
(!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) &&
#endif
(param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) {
algo->Set(result.algo, algo_is_tensor_core);
return;
Expand Down
5 changes: 5 additions & 0 deletions src/operator/nn/cudnn/cudnn_pooling-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ class CuDNNPoolingOp {
param_ = p;
switch (param_.pool_type) {
case pool_enum::kMaxPooling:
#if CUDNN_MAJOR >= 7
mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ?
CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX;
#else
mode_ = CUDNN_POOLING_MAX;
#endif
break;
case pool_enum::kAvgPooling:
if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) {
Expand Down