From 2078132f5e6ad8dbfafe8c1d73a18f75ae08486f Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 9 Jun 2021 07:38:40 +0200 Subject: [PATCH 01/14] [1.x] Add API to control denormalized computations --- include/mxnet/c_api.h | 7 +++++++ python/mxnet/base.py | 2 ++ python/mxnet/util.py | 11 +++++++++++ src/c_api/c_api.cc | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 98a7a7032e5e..edbe50ba8cfa 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -261,6 +261,13 @@ MXNET_DLL int MXRandomSeed(int seed); */ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); +/*! + * \brief Change floating-point calculations when dealing with denormalized values. + * \param value state of flush-to-zero and denormals-are-zero to set. + * \return 0 when success, -1 when failure happens. + */ +MXNET_DLL int MXFTZDenorms(bool value); + /*! * \brief Notify the engine about a shutdown, * This can help engine to print less messages into display. diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 496f1f524893..27d9d275bbf0 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -350,6 +350,8 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() +check_call(_LIB.MXFTZDenorms(ctypes.c_bool(True))) + # type definitions mx_int = ctypes.c_int mx_uint = ctypes.c_uint diff --git a/python/mxnet/util.py b/python/mxnet/util.py index aabd5fe9cdfe..538c82502eb7 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -848,3 +848,14 @@ def setenv(name, value): """ passed_value = None if value is None else c_str(value) check_call(_LIB.MXSetEnv(c_str(name), passed_value)) + +def ftz_denorms(value): + """Change floating-point calculations when dealing with denormalized values. + + Parameters + ---------- + value : bool + State of flush-to-zero and denormals-are-zero in MXCSR register + """ + passed_value = ctypes.c_bool(value) + check_call(_LIB.MXFTZDenorms(passed_value)) \ No newline at end of file diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 1cb55835bac7..7e7fb90f953c 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1573,6 +1573,42 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { API_END(); } +int MXFTZDenorms(bool value) { + API_BEGIN(); + // FTZ only applies to SSE and AVX instructions. + #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + auto is_dmz_flag_available = []() { + // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 + // "Checking for the DAZ Flag in the MXCSR Register" + constexpr unsigned int mxcsr_mask_offset = 28; + constexpr unsigned int dmz_flag_offset = 5; + constexpr unsigned int fxsave_req_bytes = 512; + + char* fxsave_area_ptr = reinterpret_cast(malloc(fxsave_req_bytes)); + memset(fxsave_area_ptr, 0, fxsave_req_bytes); // fill memory with 0 + _fxsave(fxsave_area_ptr); + + char* mxcsr_mask_ptr = fxsave_area_ptr + mxcsr_mask_offset; + uint32_t mxcsr_mask = *(reinterpret_cast((mxcsr_mask_ptr))); + bool dmz_flag = (mxcsr_mask >> dmz_flag_offset) & 0x1; + free(fxsave_area_ptr); + return dmz_flag; + }; + + const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; + const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + + _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); + // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 + // to it will cause a general-protection exception (#GP) + if (is_dmz_flag_available()) { + _MM_SET_DENORMALS_ZERO_MODE(DMZ_STATE); + } + #endif + + API_END(); +} + int MXNotifyShutdown() { API_BEGIN(); mxnet::op::custom::CustomOperator::Get()->Stop(); From dd1e6cc15ab1975553eedddaea59858581d4d3c4 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 9 Jun 2021 12:41:14 +0200 Subject: [PATCH 02/14] Edit name and description --- include/mxnet/c_api.h | 2 +- python/mxnet/base.py | 2 +- python/mxnet/util.py | 10 +++++++--- src/c_api/c_api.cc | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index edbe50ba8cfa..f29208696817 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -266,7 +266,7 @@ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); * \param value state of flush-to-zero and denormals-are-zero to set. * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXFTZDenorms(bool value); +MXNET_DLL int MXSetFlushDenorms(bool value); /*! * \brief Notify the engine about a shutdown, diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 27d9d275bbf0..7a3409661725 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -350,7 +350,7 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() -check_call(_LIB.MXFTZDenorms(ctypes.c_bool(True))) +check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True))) # type definitions mx_int = ctypes.c_int diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 538c82502eb7..447a8d39bc50 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -849,13 +849,17 @@ def setenv(name, value): passed_value = None if value is None else c_str(value) check_call(_LIB.MXSetEnv(c_str(name), passed_value)) -def ftz_denorms(value): +def set_flush_denorms(value): """Change floating-point calculations when dealing with denormalized values. - + This is only applicable to architectures which supports flush-to-zero. + Denormalized values are positive and negative values that are very close to 0 + (exponent is the smallest possible value). + Flushing denormalized values to 0 can speedup calculations if such values occurs, + but if IEEE 754 standard is required this option should be disabled. Parameters ---------- value : bool State of flush-to-zero and denormals-are-zero in MXCSR register """ passed_value = ctypes.c_bool(value) - check_call(_LIB.MXFTZDenorms(passed_value)) \ No newline at end of file + check_call(_LIB.MXSetFlushDenorms(passed_value)) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 7e7fb90f953c..27459850953d 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1573,7 +1573,7 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { API_END(); } -int MXFTZDenorms(bool value) { +int MXSetFlushDenorms(bool value) { API_BEGIN(); // FTZ only applies to SSE and AVX instructions. #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) From 9f24b10e422af68f1210f25a1d36c2f8c1e0cdfc Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 9 Jun 2021 12:47:55 +0200 Subject: [PATCH 03/14] Add direct imports --- src/c_api/c_api.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 27459850953d..6022d33ff196 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -59,6 +59,11 @@ #include "../common/utils.h" #include "nnvm/pass_functions.h" +#if defined(__x86_64__) || defined(_M_X64) +#include +#include +#endif + using namespace mxnet; // Internal function to get the information From 84cc6859627619d486316ad6735fcc75c82a92d6 Mon Sep 17 00:00:00 2001 From: bgawrych Date: Wed, 9 Jun 2021 15:00:57 +0200 Subject: [PATCH 04/14] Edit description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Andrzej Kotłowski --- python/mxnet/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 447a8d39bc50..068d97573aa2 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -855,7 +855,7 @@ def set_flush_denorms(value): Denormalized values are positive and negative values that are very close to 0 (exponent is the smallest possible value). Flushing denormalized values to 0 can speedup calculations if such values occurs, - but if IEEE 754 standard is required this option should be disabled. + but if fulfilling whole IEEE 754 standard is required this option should be disabled. Parameters ---------- value : bool From 5140edd97ef2c26b98f19ca771e400cd5fada32e Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Mon, 14 Jun 2021 12:11:51 +0200 Subject: [PATCH 05/14] Sanity & review --- python/mxnet/util.py | 2 +- src/c_api/c_api.cc | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 068d97573aa2..d687b5bddcb9 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -859,7 +859,7 @@ def set_flush_denorms(value): Parameters ---------- value : bool - State of flush-to-zero and denormals-are-zero in MXCSR register + State of flush-to-zero and denormals-are-zero in MXCSR register """ passed_value = ctypes.c_bool(value) check_call(_LIB.MXSetFlushDenorms(passed_value)) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 6022d33ff196..5431d6f173ae 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1582,7 +1582,7 @@ int MXSetFlushDenorms(bool value) { API_BEGIN(); // FTZ only applies to SSE and AVX instructions. #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) - auto is_dmz_flag_available = []() { + std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 // "Checking for the DAZ Flag in the MXCSR Register" constexpr unsigned int mxcsr_mask_offset = 28; @@ -1595,6 +1595,7 @@ int MXSetFlushDenorms(bool value) { char* mxcsr_mask_ptr = fxsave_area_ptr + mxcsr_mask_offset; uint32_t mxcsr_mask = *(reinterpret_cast((mxcsr_mask_ptr))); + // DMZ flag is supported if sixth bit of MXCSR_MASK is hot bool dmz_flag = (mxcsr_mask >> dmz_flag_offset) & 0x1; free(fxsave_area_ptr); return dmz_flag; From 2e8dd20699de4157eb226701b5d39de60b0b526c Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Tue, 22 Jun 2021 10:59:22 +0200 Subject: [PATCH 06/14] Return previous state of the FTZ flag --- include/mxnet/c_api.h | 3 ++- python/mxnet/base.py | 2 +- python/mxnet/util.py | 4 +++- src/c_api/c_api.cc | 4 +++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index f29208696817..0613a7fcc019 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -264,9 +264,10 @@ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); /*! * \brief Change floating-point calculations when dealing with denormalized values. * \param value state of flush-to-zero and denormals-are-zero to set. + * \param prev_state state of flush-to-zero and denormals-are-zero before setting new state. * \return 0 when success, -1 when failure happens. */ -MXNET_DLL int MXSetFlushDenorms(bool value); +MXNET_DLL int MXSetFlushDenorms(bool value, bool* prev_state); /*! * \brief Notify the engine about a shutdown, diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 7a3409661725..b0da85729d94 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -350,7 +350,7 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() -check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True))) +check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True), ctypes.byref(ctypes.c_bool()))) # type definitions mx_int = ctypes.c_int diff --git a/python/mxnet/util.py b/python/mxnet/util.py index d687b5bddcb9..5f9043173d24 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -861,5 +861,7 @@ def set_flush_denorms(value): value : bool State of flush-to-zero and denormals-are-zero in MXCSR register """ + ret = ctypes.c_bool() passed_value = ctypes.c_bool(value) - check_call(_LIB.MXSetFlushDenorms(passed_value)) + check_call(_LIB.MXSetFlushDenorms(passed_value, ctypes.byref(ret))) + return ret.value diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5431d6f173ae..f6fd825aaa0f 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1578,9 +1578,10 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { API_END(); } -int MXSetFlushDenorms(bool value) { +int MXSetFlushDenorms(bool value, bool* prev_state) { API_BEGIN(); // FTZ only applies to SSE and AVX instructions. + *prev_state = false; #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 @@ -1604,6 +1605,7 @@ int MXSetFlushDenorms(bool value) { const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + *prev_state = _MM_GET_FLUSH_ZERO_MODE(); _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 // to it will cause a general-protection exception (#GP) From 3b6069ef3d7bf4a4c99bfdbe9863687cccb64a9f Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 23 Jun 2021 10:53:54 +0200 Subject: [PATCH 07/14] Utilize Engine::PushSync --- python/mxnet/base.py | 4 ++-- src/c_api/c_api.cc | 28 ++++++++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/python/mxnet/base.py b/python/mxnet/base.py index b0da85729d94..ca98116e5891 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -350,8 +350,8 @@ def _load_lib(): # library instance of mxnet _LIB = _load_lib() -check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True), ctypes.byref(ctypes.c_bool()))) - +check_call(_LIB.MXSetFlushDenorms(ctypes.c_bool(True), + ctypes.byref(ctypes.c_bool()))) # type definitions mx_int = ctypes.c_int mx_uint = ctypes.c_uint diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f6fd825aaa0f..d9090dce3ff6 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1580,8 +1580,9 @@ int MXRandomSeedContext(int seed, int dev_type, int dev_id) { int MXSetFlushDenorms(bool value, bool* prev_state) { API_BEGIN(); - // FTZ only applies to SSE and AVX instructions. *prev_state = false; + + // FTZ only applies to SSE and AVX instructions. #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 @@ -1602,16 +1603,23 @@ int MXSetFlushDenorms(bool value, bool* prev_state) { return dmz_flag; }; - const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; - const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + Engine::Get()->PushSync( + [value, prev_state, is_dmz_flag_available](RunContext rctx) { + const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; + const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; + *prev_state = _MM_GET_FLUSH_ZERO_MODE(); + _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); + + // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 + // to it will cause a general-protection exception (#GP) + if (is_dmz_flag_available()) { + _MM_SET_DENORMALS_ZERO_MODE(DMZ_STATE); + } + }, Context::CPU(), {}, {}, + FnProperty::kNormal, 0, "SetFlushDenorms"); + + Engine::Get()->WaitForAll(); - *prev_state = _MM_GET_FLUSH_ZERO_MODE(); - _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE); - // If the DAZ flag is not supported, then it is a reserved bit and attempting to write a 1 - // to it will cause a general-protection exception (#GP) - if (is_dmz_flag_available()) { - _MM_SET_DENORMALS_ZERO_MODE(DMZ_STATE); - } #endif API_END(); From 1fe2fe266bac0d7e41df18ab1c40e83db92c2cd7 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Wed, 23 Jun 2021 10:54:18 +0200 Subject: [PATCH 08/14] Disable FTZ for numpy_interoperability case --- tests/python/unittest/test_numpy_interoperability.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 63e74c746d5b..4250a098b807 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -22,9 +22,11 @@ import sys import platform import itertools -import numpy as _np import unittest + from mxnet import np +from mxnet import util +import numpy as _np from mxnet.test_utils import assert_almost_equal from mxnet.test_utils import use_np from mxnet.test_utils import is_op_runnable @@ -3008,6 +3010,7 @@ def _check_interoperability_helper(op_name, rel_tol, abs_tol, *args, **kwargs): assert False if not is_op_runnable(): return + out = onp_op(*args, **kwargs) expected_out = _get_numpy_op_output(onp_op, *args, **kwargs) if isinstance(out, (tuple, list)): @@ -3077,7 +3080,11 @@ def test_np_array_function_protocol(): @use_np @with_array_ufunc_protocol def test_np_array_ufunc_protocol(): - check_interoperability(_NUMPY_ARRAY_UFUNC_LIST) + prev_state = util.set_flush_denorms(False) + try: + check_interoperability(_NUMPY_ARRAY_UFUNC_LIST) + finally: + util.set_flush_denorms(prev_state) @with_seed() From 46d5998c05036b332eaf57d09aec648a07da52f4 Mon Sep 17 00:00:00 2001 From: bgawrych Date: Wed, 23 Jun 2021 15:41:54 +0200 Subject: [PATCH 09/14] Update python/mxnet/util.py Co-authored-by: Sheng Zha --- python/mxnet/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 5f9043173d24..ce69807216d3 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -850,12 +850,13 @@ def setenv(name, value): check_call(_LIB.MXSetEnv(c_str(name), passed_value)) def set_flush_denorms(value): - """Change floating-point calculations when dealing with denormalized values. + """Change floating-point calculations on CPU when dealing with denormalized values. This is only applicable to architectures which supports flush-to-zero. Denormalized values are positive and negative values that are very close to 0 (exponent is the smallest possible value). Flushing denormalized values to 0 can speedup calculations if such values occurs, but if fulfilling whole IEEE 754 standard is required this option should be disabled. + Parameters ---------- value : bool From 1c7e080654cd87a325a651f95eda1a47242b35c5 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Fri, 25 Jun 2021 14:48:47 +0200 Subject: [PATCH 10/14] Add required header & fix test --- src/c_api/c_api.cc | 12 +++++++++--- tests/python/unittest/test_numpy_interoperability.py | 3 +-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index d9090dce3ff6..94bb708e4be7 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -59,9 +59,16 @@ #include "../common/utils.h" #include "nnvm/pass_functions.h" -#if defined(__x86_64__) || defined(_M_X64) +// FTZ only applies to SSE and AVX instructions. +#define SUPPORT_FTZ_DMZ defined(__SSE__) || \ + defined(__x86_64__) || \ + defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + +#if SUPPORT_FTZ_DMZ #include #include +#include #endif using namespace mxnet; @@ -1582,8 +1589,7 @@ int MXSetFlushDenorms(bool value, bool* prev_state) { API_BEGIN(); *prev_state = false; - // FTZ only applies to SSE and AVX instructions. - #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + #if SUPPORT_FTZ_DMZ std::function is_dmz_flag_available = []() { // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1 // "Checking for the DAZ Flag in the MXCSR Register" diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 4250a098b807..e712adb7b426 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -24,8 +24,7 @@ import itertools import unittest -from mxnet import np -from mxnet import util +from mxnet import np, util import numpy as _np from mxnet.test_utils import assert_almost_equal from mxnet.test_utils import use_np From 7d923ca42c00bf6cbb37787dc55821b57df459ed Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Mon, 28 Jun 2021 09:46:30 +0200 Subject: [PATCH 11/14] Fix macro expansion --- src/c_api/c_api.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 94bb708e4be7..54f03db0626e 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -60,10 +60,12 @@ #include "nnvm/pass_functions.h" // FTZ only applies to SSE and AVX instructions. -#define SUPPORT_FTZ_DMZ defined(__SSE__) || \ - defined(__x86_64__) || \ - defined(_M_X64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 1) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 1) +#define SUPPORT_FTZ_DMZ 1 +#else +#define SUPPORT_FTZ_DMZ 0 +#endif #if SUPPORT_FTZ_DMZ #include From c0f061a054d623a1e22426b9bede887cf726d26d Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Mon, 28 Jun 2021 12:46:29 +0200 Subject: [PATCH 12/14] Don't include x86instrin.h when compiling with MSVC --- src/c_api/c_api.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 54f03db0626e..bcbdab10b361 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -70,9 +70,12 @@ #if SUPPORT_FTZ_DMZ #include #include +#endif +#if SUPPORT_FTZ_DMZ && !defined(_MSC_VER) #include #endif + using namespace mxnet; // Internal function to get the information From 8455a63da46bd28ab5712bd898c21294dd527d14 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Fri, 2 Jul 2021 08:31:57 +0200 Subject: [PATCH 13/14] Update documentation --- include/mxnet/c_api.h | 3 +++ python/mxnet/util.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 0613a7fcc019..6e441e20f56f 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -263,6 +263,9 @@ MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id); /*! * \brief Change floating-point calculations when dealing with denormalized values. + * Currently this option is only supported in CPU backend. + * Flushing denormalized values to zero is enabled by default. + * * \param value state of flush-to-zero and denormals-are-zero to set. * \param prev_state state of flush-to-zero and denormals-are-zero before setting new state. * \return 0 when success, -1 when failure happens. diff --git a/python/mxnet/util.py b/python/mxnet/util.py index ce69807216d3..d0584b3555e1 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -856,11 +856,17 @@ def set_flush_denorms(value): (exponent is the smallest possible value). Flushing denormalized values to 0 can speedup calculations if such values occurs, but if fulfilling whole IEEE 754 standard is required this option should be disabled. + Flushing denormalized values is enabled in MXNet by default. Parameters ---------- value : bool State of flush-to-zero and denormals-are-zero in MXCSR register + + Returns + ------- + prev_state : bool + Previous state of flush-to-zero in MXCSR register """ ret = ctypes.c_bool() passed_value = ctypes.c_bool(value) From 2fd6425bf6d843c8a483cc03d5b936529f0619d2 Mon Sep 17 00:00:00 2001 From: "B. Gawrych" Date: Fri, 16 Jul 2021 11:08:00 +0200 Subject: [PATCH 14/14] Remove added empty line --- tests/python/unittest/test_numpy_interoperability.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index e712adb7b426..7c3081ba30db 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -3009,7 +3009,6 @@ def _check_interoperability_helper(op_name, rel_tol, abs_tol, *args, **kwargs): assert False if not is_op_runnable(): return - out = onp_op(*args, **kwargs) expected_out = _get_numpy_op_output(onp_op, *args, **kwargs) if isinstance(out, (tuple, list)):