From 3a2dcc2534f7e61b41e674d47b34c226e53d2d93 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Fri, 26 Oct 2018 15:14:10 -0700 Subject: [PATCH 1/7] add env variable to choose deterministic cudnn alg --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 ++ src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 ++ src/operator/nn/cudnn/cudnn_pooling-inl.h | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index acdd64976651..ab57a3668798 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -854,6 +854,7 @@ class CuDNNConvolutionOp { size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo that matches the algo_preference (-1 = any), // regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -861,6 +862,7 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 041bea66f7bf..186c417b044c 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -800,6 +800,7 @@ class CuDNNDeconvolutionOp { void AlgoFinalSelect(const std::vector &perf_results, std::string kernel_name, size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -807,6 +808,7 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index bc3ee366007c..bb3e9c7e3342 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,7 +48,8 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: - mode_ = CUDNN_POOLING_MAX; + mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0) ? + CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) { From 2c56d7f24c4b5693b0fec226dfb2d3361b9fda61 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 13:34:09 -0700 Subject: [PATCH 2/7] set default value to false --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_pooling-inl.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index ab57a3668798..c7bf0996e3c1 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -854,7 +854,7 @@ class CuDNNConvolutionOp { size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo that matches the algo_preference (-1 = any), // regardless of mathType. - bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 186c417b044c..b1b37dc8b134 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -800,7 +800,7 @@ class CuDNNDeconvolutionOp { void AlgoFinalSelect(const std::vector &perf_results, std::string kernel_name, size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo regardless of mathType. - bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index bb3e9c7e3342..29192b197ab3 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,7 +48,7 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: - mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0) ? + mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; break; case pool_enum::kAvgPooling: From ec18ebc274a67fbc1ff42d6055b9724c82d75bf7 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 15:10:49 -0700 Subject: [PATCH 3/7] fix build failure in Windows GPU --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index c7bf0996e3c1..10be8722b162 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -862,7 +862,7 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index b1b37dc8b134..b80682471977 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -808,7 +808,7 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; From 2a15893f2972c6ea1fe1852e77895676c695d7ec Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 15:16:55 -0700 Subject: [PATCH 4/7] revert the previous change --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 10be8722b162..c7bf0996e3c1 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -862,7 +862,7 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index b80682471977..b1b37dc8b134 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -808,7 +808,7 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; From a1ff7141a87a10a85d497f80a8c6c4c94776a193 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 15:41:46 -0700 Subject: [PATCH 5/7] only check determinism in CUDNN 7.x release --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 ++ src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index c7bf0996e3c1..39021243ff18 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -862,7 +862,9 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index b1b37dc8b134..22fded712fe5 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -808,7 +808,9 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; From 89af0d291448461259a0c9e3b124664e228f2ae5 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 16:17:14 -0700 Subject: [PATCH 6/7] Add cudnn version check --- src/operator/nn/cudnn/cudnn_pooling-inl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index 29192b197ab3..5c88bd929ae4 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,8 +48,11 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: + #if CUDNN_MAJOR >= 7 mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; + #else + mode_ = CUDNN_POOLING_MAX; break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) { From a28c285afa23a66f6e16d922d10ef8c388f4ee62 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 16:36:35 -0700 Subject: [PATCH 7/7] fix lint error --- src/operator/nn/cudnn/cudnn_pooling-inl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index 5c88bd929ae4..89fa73ef5471 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -52,7 +52,8 @@ class CuDNNPoolingOp { mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; #else - mode_ = CUDNN_POOLING_MAX; + mode_ = CUDNN_POOLING_MAX; + #endif break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) {