From d8c94ea131997ab9a5fb32f7d0e371851c617140 Mon Sep 17 00:00:00 2001 From: Przemek Tredak Date: Tue, 12 Nov 2019 10:33:05 -0800 Subject: [PATCH 1/6] Support bool in fusion --- src/operator/fusion/fused_op.cu | 10 +++++++++- src/operator/fusion/fused_op.h | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/operator/fusion/fused_op.cu b/src/operator/fusion/fused_op.cu index 78988f13510e..f854e3b78cdc 100644 --- a/src/operator/fusion/fused_op.cu +++ b/src/operator/fusion/fused_op.cu @@ -50,6 +50,8 @@ inline std::string mshadowTypeToString(int type) { return "int"; case mshadow::kInt64: return "long long"; + case mshadow::kBool: + return "bool"; default: LOG(FATAL) << "Unknown type enum " << type; } @@ -72,6 +74,8 @@ inline int mshadowTypeToVectorLength(int type) { return 1; case mshadow::kInt64: return 1; + case mshadow::kBool: + return 4 / sizeof(bool); default: LOG(FATAL) << "Unknown type enum " << type; } @@ -156,7 +160,7 @@ void AddPointerAndShape(const TBlob& data, std::vector>* shapes, mshadow::Stream * s) { using namespace mshadow; - MSHADOW_TYPE_SWITCH(data.type_flag_, DType, { + MSHADOW_TYPE_SWITCH_WITH_BOOL(data.type_flag_, DType, { Tensor tensor = data.FlatTo1D(s); ptrs->push_back(tensor.dptr_); AddShape(data.shape_, shapes); @@ -647,7 +651,9 @@ void FusedOp::CheckShapesAndTypes(const std::vector &inputs, in_ndims->push_back(blob.ndim()); in_shapes.push_back(blob.shape_); initialized_ = initialized_ && blob.type_flag_ == inputs_[counter].dtype; + initialized_ = initialized_ && blob.ndim() == inputs_[counter].ndim; inputs_[counter].dtype = blob.type_flag_; + inputs_[counter].ndim = blob.ndim(); *nvec = max(*nvec, mshadowTypeToVectorLength(blob.type_flag_)); } @@ -657,7 +663,9 @@ void FusedOp::CheckShapesAndTypes(const std::vector &inputs, out_ndims->push_back(blob.ndim()); out_shapes.push_back(blob.shape_); initialized_ = initialized_ && blob.type_flag_ == outputs_[counter].dtype; + initialized_ = initialized_ && blob.ndim() == outputs_[counter].ndim; outputs_[counter].dtype = blob.type_flag_; + outputs_[counter].ndim = blob.ndim(); *nvec = max(*nvec, mshadowTypeToVectorLength(blob.type_flag_)); } diff --git a/src/operator/fusion/fused_op.h b/src/operator/fusion/fused_op.h index 24603ac1932f..43491f7af47a 100644 --- a/src/operator/fusion/fused_op.h +++ b/src/operator/fusion/fused_op.h @@ -52,8 +52,9 @@ struct FusedOpConfig : public dmlc::Parameter { }; struct FusedOpEntry { - FusedOpEntry() : dtype(-1) {} + FusedOpEntry() : dtype(-1), ndim(-1) {} int dtype; + int ndim; }; class FusedOp { From 41e2a87190f7fb1fe00d055e35c89a8a48a3fe83 Mon Sep 17 00:00:00 2001 From: Przemek Tredak Date: Tue, 12 Nov 2019 15:19:24 -0800 Subject: [PATCH 2/6] Added tests --- tests/python/gpu/test_fusion.py | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py index 5606eb19a9c5..f3aed8b54427 100644 --- a/tests/python/gpu/test_fusion.py +++ b/tests/python/gpu/test_fusion.py @@ -238,6 +238,49 @@ def test_fusion_compiler_cache(): if num_gpus > 1: check_fused_symbol(a+b, ctx=mx.gpu(1), a=arr1, b=arr2) +@with_seed() +def test_fusion_boolean_inputs(): + from mxnet.gluon import HybridBlock + mx.npx.set_np() + + class Foo(HybridBlock): + def __init__(self, prefix=None, params=None): + super(Foo, self).__init__(prefix=prefix, params=params) + + def hybrid_forward(self, F, valid_length): + mask = (F.np.ones((10,)) < valid_length).astype(np.float32) + mask2 = (F.np.ones((10,)) < valid_length).astype(np.float32) + mask = mask * F.np.expand_dims(mask2, axis=-1) + return mask + + foo = Foo() + foo.hybridize(static_alloc=True) + out = foo(mx.np.ones((10,), ctx=mx.gpu())) + mx.npx.reset_np() + +@with_seed() +def test_fusion_different_dimensions(): + from mxnet.gluon import HybridBlock + + class Foo(HybridBlock): + def __init__(self, prefix=None, params=None): + super(Foo, self).__init__(prefix=prefix, params=params) + + def hybrid_forward(self, F, x): + mask2 = x.astype(np.float32) + mask = F.expand_dims(mask2, axis=-1) + return mask + + foo = Foo() + foo.hybridize(static_alloc=True) + # Pass 1-D data + out = foo(mx.nd.ones((10,), ctx=mx.gpu())) + assert np.all(out.asnumpy() == np.ones((10,1))) + assert out.shape == (10,1) + # Pass 2-D data + out = foo(mx.nd.ones((10,10), ctx=mx.gpu())) + assert np.all(out.asnumpy() == np.ones((10,10))) + assert out.shape == (10,10,1) if __name__ == '__main__': import nose From 4e39f834f7db75b4fcdff5ab47f49896837ae4be Mon Sep 17 00:00:00 2001 From: Przemek Tredak Date: Wed, 20 Nov 2019 15:02:10 -0800 Subject: [PATCH 3/6] Use use_np decorator vs calling the numpy convention by hand --- tests/python/gpu/test_fusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py index f3aed8b54427..a26d7e518307 100644 --- a/tests/python/gpu/test_fusion.py +++ b/tests/python/gpu/test_fusion.py @@ -239,9 +239,9 @@ def test_fusion_compiler_cache(): check_fused_symbol(a+b, ctx=mx.gpu(1), a=arr1, b=arr2) @with_seed() +@use_np def test_fusion_boolean_inputs(): from mxnet.gluon import HybridBlock - mx.npx.set_np() class Foo(HybridBlock): def __init__(self, prefix=None, params=None): @@ -256,7 +256,7 @@ def hybrid_forward(self, F, valid_length): foo = Foo() foo.hybridize(static_alloc=True) out = foo(mx.np.ones((10,), ctx=mx.gpu())) - mx.npx.reset_np() + mx.nd.waitall() @with_seed() def test_fusion_different_dimensions(): From 6e628404d3a04108797eb57757c286cd4ce33046 Mon Sep 17 00:00:00 2001 From: Przemek Tredak Date: Thu, 21 Nov 2019 14:03:45 -0800 Subject: [PATCH 4/6] Debug TVM failure --- tests/python/gpu/test_fusion.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py index a26d7e518307..07a8e7ef884a 100644 --- a/tests/python/gpu/test_fusion.py +++ b/tests/python/gpu/test_fusion.py @@ -238,9 +238,29 @@ def test_fusion_compiler_cache(): if num_gpus > 1: check_fused_symbol(a+b, ctx=mx.gpu(1), a=arr1, b=arr2) +# @with_seed() +# @use_np +# def test_fusion_boolean_inputs(): + # from mxnet.gluon import HybridBlock + + # class Foo(HybridBlock): + # def __init__(self, prefix=None, params=None): + # super(Foo, self).__init__(prefix=prefix, params=params) + + # def hybrid_forward(self, F, valid_length): + # mask = (F.np.ones((10,)) < valid_length).astype(np.float32) + # mask2 = (F.np.ones((10,)) < valid_length).astype(np.float32) + # mask = mask * F.np.expand_dims(mask2, axis=-1) + # return mask + + # foo = Foo() + # foo.hybridize(static_alloc=True) + # out = foo(mx.np.ones((10,), ctx=mx.gpu())) + # mx.nd.waitall() + @with_seed() @use_np -def test_fusion_boolean_inputs(): +def test_fusion_boolean_inputs_debug_tvm(): from mxnet.gluon import HybridBlock class Foo(HybridBlock): @@ -248,15 +268,13 @@ def __init__(self, prefix=None, params=None): super(Foo, self).__init__(prefix=prefix, params=params) def hybrid_forward(self, F, valid_length): - mask = (F.np.ones((10,)) < valid_length).astype(np.float32) - mask2 = (F.np.ones((10,)) < valid_length).astype(np.float32) - mask = mask * F.np.expand_dims(mask2, axis=-1) - return mask + mask = (F.np.ones((10,)) < valid_length) + mx.npx.waitall() foo = Foo() foo.hybridize(static_alloc=True) out = foo(mx.np.ones((10,), ctx=mx.gpu())) - mx.nd.waitall() + mx.npx.waitall() @with_seed() def test_fusion_different_dimensions(): From ebf65fec98becf3efec061cb5fc9566626e56856 Mon Sep 17 00:00:00 2001 From: Przemek Tredak Date: Thu, 21 Nov 2019 15:28:56 -0800 Subject: [PATCH 5/6] Fix --- tests/python/gpu/test_fusion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py index 07a8e7ef884a..c0e6eb36152e 100644 --- a/tests/python/gpu/test_fusion.py +++ b/tests/python/gpu/test_fusion.py @@ -269,6 +269,7 @@ def __init__(self, prefix=None, params=None): def hybrid_forward(self, F, valid_length): mask = (F.np.ones((10,)) < valid_length) + return mask mx.npx.waitall() foo = Foo() From f9982d888508184bd9cd377a8931243b20042a39 Mon Sep 17 00:00:00 2001 From: Przemek Tredak Date: Thu, 21 Nov 2019 15:19:33 -0800 Subject: [PATCH 6/6] Do not invoke TVM broadcast kernel --- tests/python/gpu/test_fusion.py | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/tests/python/gpu/test_fusion.py b/tests/python/gpu/test_fusion.py index c0e6eb36152e..beffb353ef35 100644 --- a/tests/python/gpu/test_fusion.py +++ b/tests/python/gpu/test_fusion.py @@ -238,29 +238,9 @@ def test_fusion_compiler_cache(): if num_gpus > 1: check_fused_symbol(a+b, ctx=mx.gpu(1), a=arr1, b=arr2) -# @with_seed() -# @use_np -# def test_fusion_boolean_inputs(): - # from mxnet.gluon import HybridBlock - - # class Foo(HybridBlock): - # def __init__(self, prefix=None, params=None): - # super(Foo, self).__init__(prefix=prefix, params=params) - - # def hybrid_forward(self, F, valid_length): - # mask = (F.np.ones((10,)) < valid_length).astype(np.float32) - # mask2 = (F.np.ones((10,)) < valid_length).astype(np.float32) - # mask = mask * F.np.expand_dims(mask2, axis=-1) - # return mask - - # foo = Foo() - # foo.hybridize(static_alloc=True) - # out = foo(mx.np.ones((10,), ctx=mx.gpu())) - # mx.nd.waitall() - @with_seed() @use_np -def test_fusion_boolean_inputs_debug_tvm(): +def test_fusion_boolean_inputs(): from mxnet.gluon import HybridBlock class Foo(HybridBlock): @@ -268,13 +248,14 @@ def __init__(self, prefix=None, params=None): super(Foo, self).__init__(prefix=prefix, params=params) def hybrid_forward(self, F, valid_length): - mask = (F.np.ones((10,)) < valid_length) + mask = valid_length.astype(np.float32) + mask2 = valid_length.astype(np.float32) + mask = mask * F.np.expand_dims(mask2, axis=-1) return mask - mx.npx.waitall() foo = Foo() foo.hybridize(static_alloc=True) - out = foo(mx.np.ones((10,), ctx=mx.gpu())) + out = foo(mx.np.ones((10,), ctx=mx.gpu(), dtype=np.bool)) mx.npx.waitall() @with_seed()