From 73567e7c7290deb7d4e7a6688f0b9abd1c979305 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 9 Jun 2021 07:38:40 +0200 Subject: [PATCH 01/13] [1.x] Add API to control denormalized computations --- include/mxnet/c_api.h | 7 +++++++ python/mxnet/base.py | 2 ++ python/mxnet/util.py | 11 +++++++++++ src/c_api/c_api.cc | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index b68765eedff8..8796cb5d2989 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -272,6 +272,13 @@ MXNET_DLL int MXRandomSeed(int seed); */ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); +/*! + * \brief Change floating-point calculations when dealing with denormalized values. + * \param value state of flush-to-zero and denormals-are-zero to set. + * \return 0 when success, -1 when failure happens. + */ +MXNET_DLL int MXFTZDenorms(bool value); + /*! * \brief Notify the engine about a shutdown, * This can help engine to print less messages into display. diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 1f9f37d04d88..f22958155ee3 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -311,6 +311,8 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() +check_call(_LIB.MXFTZDenorms(ctypes.c_bool(True))) + # type definitions mx_int = ctypes.c_int mx_uint = ctypes.c_uint diff --git a/python/mxnet/util.py b/python/mxnet/util.py index cafff0f9dd9e..baad69ac2b92 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -1200,3 +1200,14 @@ def get_rtc_compile_opts(ctx): arch_opt = "--gpu-architecture={}_{}".format("sm" if should_compile_to_SASS else "compute", device_cc_as_used) return [arch_opt] + +def ftz_denorms(value): + """Change floating-point calculations when dealing with denormalized values. + + Parameters + ---------- + value : bool + State of flush-to-zero and denormals-are-zero in MXCSR register + """ + passed_value = ctypes.c_bool(value) + check_call(_LIB.MXFTZDenorms(passed_value)) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index eac1944016df..4f1e2737947f 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1587,6 +1587,42 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { API_END(); } +int MXFTZDenorms(bool value) { + API_BEGIN(); + // FTZ only applies to SSE and AVX instructions. + #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + auto is_dmz_flag_available = []() { + // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 + // "Checking for the DAZ Flag in the MXCSR Register" + constexpr unsigned int mxcsr_mask_offset = 28; + constexpr unsigned int dmz_flag_offset = 5; + constexpr unsigned int fxsave_req_bytes = 512; + + char* fxsave_area_ptr = reinterpret_cast(malloc(fxsave_req_bytes)); + memset(fxsave_area_ptr, 0, fxsave_req_bytes); // fill memory with 0 + _fxsave(fxsave_area_ptr); + + char* mxcsr_mask_ptr = fxsave_area_ptr + mxcsr_mask_offset; + uint32_t mxcsr_mask = *(reinterpret_cast((mxcsr_mask_ptr))); + bool dmz_flag = (mxcsr_mask >> dmz_flag_offset) & 0x1; + free(fxsave_area_ptr); + return dmz_flag; + }; + + const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; + const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + + _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); + // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 + // to it will cause a general-protection exception (#GP) + if (is_dmz_flag_available()) { + _MM_SET_DENORMALS_ZERO_MODE(DMZ_STATE); + } + #endif + + API_END(); +} + int MXNotifyShutdown() { API_BEGIN(); mxnet::op::custom::CustomOperator::Get()->Stop(); From 232947b7630d3041b7289e61822718ed22a57521 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 9 Jun 2021 12:41:14 +0200 Subject: [PATCH 02/13] Edit name and description --- include/mxnet/c_api.h | 2 +- python/mxnet/base.py | 2 +- python/mxnet/util.py | 10 +++++++--- src/c_api/c_api.cc | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 8796cb5d2989..1c2dc6bd1f8c 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -277,7 +277,7 @@ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); * \param value state of flush-to-zero and denormals-are-zero to set. * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXFTZDenorms(bool value); +MXNET_DLL int MXSetFlushDenorms(bool value); /*! * \brief Notify the engine about a shutdown, diff --git a/python/mxnet/base.py b/python/mxnet/base.py index f22958155ee3..13aaabb76d63 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -311,7 +311,7 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() -check_call(_LIB.MXFTZDenorms(ctypes.c_bool(True))) +check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True))) # type definitions mx_int = ctypes.c_int diff --git a/python/mxnet/util.py b/python/mxnet/util.py index baad69ac2b92..0b660e466565 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -1201,13 +1201,17 @@ def get_rtc_compile_opts(ctx): device_cc_as_used) return [arch_opt] -def ftz_denorms(value): +def set_flush_denorms(value): """Change floating-point calculations when dealing with denormalized values. - + This is only applicable to architectures which supports flush-to-zero. + Denormalized values are positive and negative values that are very close to 0 + (exponent is the smallest possible value). + Flushing denormalized values to 0 can speedup calculations if such values occurs, + but if IEEE 754 standard is required this option should be disabled. Parameters ---------- value : bool State of flush-to-zero and denormals-are-zero in MXCSR register """ passed_value = ctypes.c_bool(value) - check_call(_LIB.MXFTZDenorms(passed_value)) + check_call(_LIB.MXSetFlushDenorms(passed_value)) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 4f1e2737947f..9ea72c3f6874 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1587,7 +1587,7 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { API_END(); } -int MXFTZDenorms(bool value) { +int MXSetFlushDenorms(bool value) { API_BEGIN(); // FTZ only applies to SSE and AVX instructions. #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) From 1e451f0baaea9d0127f97a586a4dc57b5c10ec79 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 9 Jun 2021 12:47:55 +0200 Subject: [PATCH 03/13] Add direct imports --- src/c_api/c_api.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 9ea72c3f6874..8c8165af6785 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -62,6 +62,11 @@ #include "miniz.h" #include "nnvm/pass_functions.h" +#if defined(__x86_64__) || defined(_M_X64) +#include +#include +#endif + using namespace mxnet; // Internal function to get the information From 01ea4b6d2a85b43fc8107dc92bf3910640078d6e Mon Sep 17 00:00:00 2001 From: bgawrych Date: Wed, 9 Jun 2021 15:00:57 +0200 Subject: [PATCH 04/13] Edit description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Andrzej Kotłowski --- python/mxnet/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 0b660e466565..df08bf99cc6a 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -1207,7 +1207,7 @@ def set_flush_denorms(value): Denormalized values are positive and negative values that are very close to 0 (exponent is the smallest possible value). Flushing denormalized values to 0 can speedup calculations if such values occurs, - but if IEEE 754 standard is required this option should be disabled. + but if fulfilling whole IEEE 754 standard is required this option should be disabled. Parameters ---------- value : bool From 07a629423394a7033a2344d4f5e9153f36993b5d Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Mon, 14 Jun 2021 12:11:51 +0200 Subject: [PATCH 05/13] Sanity & review --- python/mxnet/util.py | 2 +- src/c_api/c_api.cc | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/mxnet/util.py b/python/mxnet/util.py index df08bf99cc6a..25be58d26f91 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -1211,7 +1211,7 @@ def set_flush_denorms(value): Parameters ---------- value : bool - State of flush-to-zero and denormals-are-zero in MXCSR register + State of flush-to-zero and denormals-are-zero in MXCSR register """ passed_value = ctypes.c_bool(value) check_call(_LIB.MXSetFlushDenorms(passed_value)) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 8c8165af6785..ee1291885af9 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1596,7 +1596,7 @@ int MXSetFlushDenorms(bool value) { API_BEGIN(); // FTZ only applies to SSE and AVX instructions. #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) - auto is_dmz_flag_available = []() { + std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 // "Checking for the DAZ Flag in the MXCSR Register" constexpr unsigned int mxcsr_mask_offset = 28; @@ -1609,6 +1609,7 @@ int MXSetFlushDenorms(bool value) { char* mxcsr_mask_ptr = fxsave_area_ptr + mxcsr_mask_offset; uint32_t mxcsr_mask = *(reinterpret_cast((mxcsr_mask_ptr))); + // DMZ flag is supported if sixth bit of MXCSR_MASK is hot bool dmz_flag = (mxcsr_mask >> dmz_flag_offset) & 0x1; free(fxsave_area_ptr); return dmz_flag; From 0dedfd8f18a77c2a096d5b6ed9a1d8e74f221d0e Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Tue, 22 Jun 2021 10:59:22 +0200 Subject: [PATCH 06/13] Return previous state of the FTZ flag --- include/mxnet/c_api.h | 3 ++- python/mxnet/base.py | 2 +- python/mxnet/util.py | 4 +++- src/c_api/c_api.cc | 4 +++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 1c2dc6bd1f8c..8a5ca61e9861 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -275,9 +275,10 @@ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); /*! * \brief Change floating-point calculations when dealing with denormalized values. * \param value state of flush-to-zero and denormals-are-zero to set. + * \param prev_state state of flush-to-zero and denormals-are-zero before setting new state. * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXSetFlushDenorms(bool value); +MXNET_DLL int MXSetFlushDenorms(bool value, bool* prev_state); /*! * \brief Notify the engine about a shutdown, diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 13aaabb76d63..f4a9586f9bac 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -311,7 +311,7 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() -check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True))) +check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True), ctypes.byref(ctypes.c_bool()))) # type definitions mx_int = ctypes.c_int diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 25be58d26f91..be755974aa6c 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -1213,5 +1213,7 @@ def set_flush_denorms(value): value : bool State of flush-to-zero and denormals-are-zero in MXCSR register """ + ret = ctypes.c_bool() passed_value = ctypes.c_bool(value) - check_call(_LIB.MXSetFlushDenorms(passed_value)) + check_call(_LIB.MXSetFlushDenorms(passed_value, ctypes.byref(ret))) + return ret.value diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index ee1291885af9..75f7c4a620f0 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1592,9 +1592,10 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { API_END(); } -int MXSetFlushDenorms(bool value) { +int MXSetFlushDenorms(bool value, bool* prev_state) { API_BEGIN(); // FTZ only applies to SSE and AVX instructions. + *prev_state = false; #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 @@ -1618,6 +1619,7 @@ int MXSetFlushDenorms(bool value) { const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + *prev_state = _MM_GET_FLUSH_ZERO_MODE(); _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 // to it will cause a general-protection exception (#GP) From 78dbc6f4b1d2cde3298fdcd45bf6f2408c2b93c8 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 23 Jun 2021 10:53:54 +0200 Subject: [PATCH 07/13] Utilize Engine::PushSync --- python/mxnet/base.py | 4 ++-- src/c_api/c_api.cc | 28 ++++++++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/python/mxnet/base.py b/python/mxnet/base.py index f4a9586f9bac..2e8d4b484318 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -311,8 +311,8 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() -check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True), ctypes.byref(ctypes.c_bool()))) - +check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True), + ctypes.byref(ctypes.c_bool()))) # type definitions mx_int = ctypes.c_int mx_uint = ctypes.c_uint diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 75f7c4a620f0..a6d54b899172 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1594,8 +1594,9 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { int MXSetFlushDenorms(bool value, bool* prev_state) { API_BEGIN(); - // FTZ only applies to SSE and AVX instructions. *prev_state = false; + + // FTZ only applies to SSE and AVX instructions. #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 @@ -1616,16 +1617,23 @@ int MXSetFlushDenorms(bool value, bool* prev_state) { return dmz_flag; }; - const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; - const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + Engine::Get()->PushSync( + [value, prev_state, is_dmz_flag_available](RunContext rctx) { + const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; + const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + *prev_state = _MM_GET_FLUSH_ZERO_MODE(); + _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); + + // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 + // to it will cause a general-protection exception (#GP) + if (is_dmz_flag_available()) { + _MM_SET_DENORMALS_ZERO_MODE(DMZ_STATE); + } + }, Context::CPU(), {}, {}, + FnProperty::kNormal, 0, "SetFlushDenorms"); + + Engine::Get()->WaitForAll(); - *prev_state = _MM_GET_FLUSH_ZERO_MODE(); - _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); - // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 - // to it will cause a general-protection exception (#GP) - if (is_dmz_flag_available()) { - _MM_SET_DENORMALS_ZERO_MODE(DMZ_STATE); - } #endif API_END(); From 94ac95afc27006b27fa2368e273d3e9ea9e05256 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Fri, 25 Jun 2021 10:42:53 +0200 Subject: [PATCH 08/13] Disable FTZ for numpy_interoperability case --- tests/python/unittest/test_numpy_interoperability.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 1fa7d5284399..34b6c9068ff0 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -3341,7 +3341,11 @@ def test_np_array_function_protocol(): @with_array_ufunc_protocol @pytest.mark.serial def test_np_array_ufunc_protocol(): - check_interoperability(_NUMPY_ARRAY_UFUNC_LIST) + prev_state = util.set_flush_denorms(False) + try: + check_interoperability(_NUMPY_ARRAY_UFUNC_LIST) + finally: + util.set_flush_denorms(prev_state) @use_np From 1d3a0ddf76793e437e28ac58c71c89162517300d Mon Sep 17 00:00:00 2001 From: bgawrych Date: Wed, 23 Jun 2021 15:41:54 +0200 Subject: [PATCH 09/13] Update python/mxnet/util.py Co-authored-by: Sheng Zha --- python/mxnet/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/mxnet/util.py b/python/mxnet/util.py index be755974aa6c..79544015978d 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -1202,12 +1202,13 @@ def get_rtc_compile_opts(ctx): return [arch_opt] def set_flush_denorms(value): - """Change floating-point calculations when dealing with denormalized values. + """Change floating-point calculations on CPU when dealing with denormalized values. This is only applicable to architectures which supports flush-to-zero. Denormalized values are positive and negative values that are very close to 0 (exponent is the smallest possible value). Flushing denormalized values to 0 can speedup calculations if such values occurs, but if fulfilling whole IEEE 754 standard is required this option should be disabled. + Parameters ---------- value : bool From 3947f69c169883809acd99b76c74841a3b0d54e5 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Fri, 25 Jun 2021 14:48:47 +0200 Subject: [PATCH 10/13] Add required header & fix test --- src/c_api/c_api.cc | 12 +++++++++--- tests/python/unittest/test_numpy_interoperability.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index a6d54b899172..f6fd6347e43f 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -62,9 +62,16 @@ #include "miniz.h" #include "nnvm/pass_functions.h" -#if defined(__x86_64__) || defined(_M_X64) +// FTZ only applies to SSE and AVX instructions. +#define SUPPORT_FTZ_DMZ defined(__SSE__) || \ + defined(__x86_64__) || \ + defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + +#if SUPPORT_FTZ_DMZ #include #include +#include #endif using namespace mxnet; @@ -1596,8 +1603,7 @@ int MXSetFlushDenorms(bool value, bool* prev_state) { API_BEGIN(); *prev_state = false; - // FTZ only applies to SSE and AVX instructions. - #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + #if SUPPORT_FTZ_DMZ std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 // "Checking for the DAZ Flag in the MXCSR Register" diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 34b6c9068ff0..1b8fe4d132c9 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -25,7 +25,7 @@ import numpy as _np import unittest import pytest -from mxnet import np +from mxnet import np, util from mxnet.test_utils import assert_almost_equal from mxnet.test_utils import use_np from mxnet.test_utils import is_op_runnable From ed10f9250ec5916ab853f7602e0237c63f32caec Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Mon, 28 Jun 2021 09:46:30 +0200 Subject: [PATCH 11/13] Fix macro expansion --- src/c_api/c_api.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f6fd6347e43f..cdd0b6cd41b1 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -63,10 +63,12 @@ #include "nnvm/pass_functions.h" // FTZ only applies to SSE and AVX instructions. -#define SUPPORT_FTZ_DMZ defined(__SSE__) || \ - defined(__x86_64__) || \ - defined(_M_X64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 1) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 1) +#define SUPPORT_FTZ_DMZ 1 +#else +#define SUPPORT_FTZ_DMZ 0 +#endif #if SUPPORT_FTZ_DMZ #include From 54fca86fc4967c081d4293aeb4b7c5dd1c4dc8c2 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Mon, 28 Jun 2021 12:46:29 +0200 Subject: [PATCH 12/13] Don't include x86instrin.h when compiling with MSVC --- src/c_api/c_api.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index cdd0b6cd41b1..c54cc0e6f470 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -73,9 +73,12 @@ #if SUPPORT_FTZ_DMZ #include #include +#endif +#if SUPPORT_FTZ_DMZ && !defined(_MSC_VER) #include #endif + using namespace mxnet; // Internal function to get the information From e187f1c16ff137528a9248a0fc3ccbe61563a144 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Fri, 2 Jul 2021 08:31:57 +0200 Subject: [PATCH 13/13] Update documentation --- include/mxnet/c_api.h | 3 +++ python/mxnet/util.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 8a5ca61e9861..977e3e07c96d 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -274,6 +274,9 @@ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); /*! * \brief Change floating-point calculations when dealing with denormalized values. + * Currently this option is only supported in CPU backend. + * Flushing denormalized values to zero is enabled by default. + * * \param value state of flush-to-zero and denormals-are-zero to set. * \param prev_state state of flush-to-zero and denormals-are-zero before setting new state. * \return 0 when success, -1 when failure happens. diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 79544015978d..ea75030614be 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -1208,11 +1208,17 @@ def set_flush_denorms(value): (exponent is the smallest possible value). Flushing denormalized values to 0 can speedup calculations if such values occurs, but if fulfilling whole IEEE 754 standard is required this option should be disabled. + Flushing denormalized values is enabled in MXNet by default. Parameters ---------- value : bool State of flush-to-zero and denormals-are-zero in MXCSR register + + Returns + ------- + prev_state : bool + Previous state of flush-to-zero in MXCSR register """ ret = ctypes.c_bool() passed_value = ctypes.c_bool(value)