From 79fb8be3491ba713fb677441df6bfd439329b876 Mon Sep 17 00:00:00 2001 From: Wei Chu Date: Mon, 10 May 2021 13:00:56 -0700 Subject: [PATCH 1/2] fix shape of initial_h and initial_c --- .../_op_translations_opset12.py | 98 +++++++++---------- .../_op_translations_opset13.py | 82 ++++++++-------- tests/python-pytest/onnx/test_operators.py | 9 +- 3 files changed, 91 insertions(+), 98 deletions(-) diff --git a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py index e577a3abb3f2..2014b057f885 100644 --- a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py +++ b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py @@ -199,15 +199,15 @@ def create_tensor(tensor_list, tensor_name, initializer, dtype='int64'): dims = np.shape(tensor_np) if dtype == np.float16: tensor_np = tensor_np.view(dtype=np.uint16) - initializer.append( - onnx.helper.make_tensor( - name=tensor_name, - data_type=data_type, - dims=dims, - vals=tensor_np.flatten().tolist(), - raw=False - ) + tensor = onnx.helper.make_tensor( + name=tensor_name, + data_type=data_type, + dims=dims, + vals=tensor_np.flatten().tolist(), + raw=False ) + initializer.append(tensor) + return tensor @mx_op.register("null") def convert_weights_and_inputs(node, **kwargs): @@ -4467,8 +4467,8 @@ def convert_RNN(node, **kwargs): from onnx import TensorProto name, input_nodes, attrs = get_inputs(node, kwargs) - mode = str(attrs.get('mode')) + mode = str(attrs.get('mode')) bidirectional = str(attrs.get('bidirectional', 'False')) if bidirectional != 'False' and mode not in ['lstm']: raise NotImplementedError('Currently RNN onnx export only supports bidirectional is False') @@ -4488,18 +4488,38 @@ def convert_RNN(node, **kwargs): raise NotImplementedError('Currently RNN onnx export only supports state_outputs equals to True') state_size = int(attrs.get('state_size')) + + direction = 1 + if bidirectional != 'False': + direction = 2 + data = input_nodes[0] param = input_nodes[1] - initial_h = input_nodes[2] + dtype = get_input_dtypes(node, kwargs)[2] - nodes = [] create_tensor([0], name+'_0', kwargs['initializer']) + create_tensor([1], name+'_1', kwargs['initializer']) + create_tensor([state_size], name+'_state_size', kwargs['initializer']) + create_tensor([direction], name+'_direction', kwargs['initializer']) + tensor_1 = create_tensor([1], name+'_1_f', kwargs['initializer'], dtype) + + nodes = [ + make_node('Shape', [data], [name+'_data_shape']), + make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), + make_node('Concat', [name+'_direction', name+'_batch_size', name+'_state_size'], [name+'_concat'], axis=0), + make_node('ConstantOfShape', [name+'_concat'], [name+'_COS'], value=tensor_1), + make_node('Mul', [input_nodes[2], name+'_COS'], [name+'initial_h']), + + ] if mode == 'lstm': - initial_c = input_nodes[3] + nodes += [ + make_node('Mul', [input_nodes[3], name+'_COS'], [name+'initial_c']), + ] + if num_layers == 2: if bidirectional != 'False': - raise NotImplementedError('Currently RNN onnx export only supports bidirectional is False') + raise NotImplementedError('Currently lstm onnx export only supports bidirectional when num_layers = 1') create_tensor([8*state_size], name+'_8*state_size', kwargs['initializer']) create_tensor([4*state_size*state_size], name+'_4*state_size^2', kwargs['initializer']) create_tensor([1, 4*state_size, state_size], name+'_WR_shape', kwargs['initializer']) @@ -4507,9 +4527,6 @@ def convert_RNN(node, **kwargs): create_tensor([4*4*state_size*state_size], name+'_WR_offset', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), - # Layer 0 # get W make_node('Slice', [param, name+'_0', name+'_4*state_size^2'], [name+'_W0_1d']), @@ -4531,8 +4548,8 @@ def convert_RNN(node, **kwargs): name+'_B04', name+'_B07', name+'_B05', name+'_B06'], [name+'_B0_'], axis=0), make_node('Reshape', [name+'_B0_', name+'_B_shape'], [name+'_B0']), # get initial states - make_node('Split', [initial_h], [name+'_initial_h0', name+'_initial_h1'], axis=0), - make_node('Split', [initial_c], [name+'_initial_c0', name+'_initial_c1'], axis=0), + make_node('Split', [name+'initial_h'], [name+'_initial_h0', name+'_initial_h1'], axis=0), + make_node('Split', [name+'initial_c'], [name+'_initial_c0', name+'_initial_c1'], axis=0), # get seq_len make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), @@ -4572,7 +4589,6 @@ def convert_RNN(node, **kwargs): ] elif num_layers == 1: if bidirectional == 'False': - create_tensor([1], name+'_1', kwargs['initializer']) create_tensor([4*state_size], name+'_4*state_size', kwargs['initializer']) create_tensor([8*state_size], name+'_8*state_size', kwargs['initializer']) create_tensor([4*state_size*state_size], name+'_4*state_size^2', kwargs['initializer']) @@ -4580,9 +4596,6 @@ def convert_RNN(node, **kwargs): create_tensor([1, 8*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], - [name+'_seq_length', name+'_batch_size', name+'_input_size']), # get W make_node('Mul', [name+'_4*state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -4609,12 +4622,11 @@ def convert_RNN(node, **kwargs): make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # compute LSTM - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h, initial_c], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size), make_node('Squeeze', [name+'0_'], [name], axes=[1]), ] else: - create_tensor([1], name+'_1', kwargs['initializer']) create_tensor([-1], name+'_-1', kwargs['initializer']) create_tensor([4*state_size], name+'_4*state_size', kwargs['initializer']) create_tensor([8*state_size], name+'_8*state_size', kwargs['initializer']) @@ -4623,9 +4635,6 @@ def convert_RNN(node, **kwargs): create_tensor([1, 8*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], - [name+'_seq_length', name+'_batch_size', name+'_input_size']), # get W_fwd make_node('Mul', [name+'_4*state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -4682,7 +4691,7 @@ def convert_RNN(node, **kwargs): make_node('Concat', [name+'_W_fwd', name+'_W_bwd'], [name+'_W'], axis=0), make_node('Concat', [name+'_R_fwd', name+'_R_bwd'], [name+'_R'], axis=0), make_node('Concat', [name+'_B_fwd', name+'_B_bwd'], [name+'_B'], axis=0), - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h, initial_c], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size, direction='bidirectional'), make_node('Transpose', [name+'0_'], [name+'0_t'], perm=[0, 2, 1, 3]), make_node('Concat', [name+'_seq_length', name+'_batch_size', name+'_-1'], @@ -4701,9 +4710,6 @@ def convert_RNN(node, **kwargs): create_tensor([4*3*state_size*state_size], name+'_WR_offset', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), - # Layer 0 # get W make_node('Slice', [param, name+'_0', name+'_3*state_size^2'], [name+'_W0_1d']), @@ -4725,7 +4731,7 @@ def convert_RNN(node, **kwargs): name+'_B04', name+'_B03', name+'_B05'], [name+'_B0_'], axis=0), make_node('Reshape', [name+'_B0_', name+'_B_shape'], [name+'_B0']), # get initial states - make_node('Split', [initial_h], [name+'_initial_h0', name+'_initial_h1'], axis=0), + make_node('Split', [name+'initial_h'], [name+'_initial_h0', name+'_initial_h1'], axis=0), # get seq_len make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), @@ -4764,8 +4770,6 @@ def convert_RNN(node, **kwargs): ] elif num_layers == 1: - - create_tensor([1], name+'_1', kwargs['initializer']) create_tensor([3*state_size], name+'_3*state_size', kwargs['initializer']) create_tensor([6*state_size], name+'_6*state_size', kwargs['initializer']) create_tensor([3*state_size*state_size], name+'_3*state_size^2', kwargs['initializer']) @@ -4773,8 +4777,6 @@ def convert_RNN(node, **kwargs): create_tensor([1, 6*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), # get W make_node('Mul', [name+'_3*state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -4799,8 +4801,8 @@ def convert_RNN(node, **kwargs): # get seq_len make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), - # compute LSTM - make_node('GRU', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h], + # compute GRU + make_node('GRU', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h'], [name+'0_', name+'1'], hidden_size=state_size, linear_before_reset=1), make_node('Squeeze', [name+'0_'], [name], axes=[1]), ] @@ -4812,7 +4814,6 @@ def convert_RNN(node, **kwargs): if mode == 'rnn_relu': activations = ['Relu'] if num_layers == 2: - create_tensor([2*state_size], name+'_2*state_size', kwargs['initializer']) create_tensor([state_size*state_size], name+'_state_size^2', kwargs['initializer']) create_tensor([1, state_size, state_size], name+'_WR_shape', kwargs['initializer']) @@ -4820,9 +4821,6 @@ def convert_RNN(node, **kwargs): create_tensor([4*state_size*state_size], name+'_WR_offset', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), - # Layer 0 # get W make_node('Slice', [param, name+'_0', name+'_state_size^2'], [name+'_W0_1d']), @@ -4836,13 +4834,14 @@ def convert_RNN(node, **kwargs): make_node('Slice', [param, name+'_WR_offset', name+'_B0_offset'], [name+'_B0_1d']), make_node('Reshape', [name+'_B0_1d', name+'_B_shape'], [name+'_B0']), # get initial states - make_node('Split', [initial_h], [name+'_initial_h0', name+'_initial_h1'], axis=0), + make_node('Split', [name+'initial_h'], [name+'_initial_h0', name+'_initial_h1'], axis=0), # get seq_len make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # Layer 0 RNN - make_node('RNN', [data, name+'_W0', name+'_R0', name+'_B0', name+'_seq_len', name+'_initial_h0'], - [name+'_rnn0_out_', name+'_rnn0_h'], hidden_size=state_size, activations=activations), + make_node('RNN', [data, name+'_W0', name+'_R0', name+'_B0', name+'_seq_len', + name+'_initial_h0'], [name+'_rnn0_out_', name+'_rnn0_h'], + hidden_size=state_size, activations=activations), make_node('Squeeze', [name+'_rnn0_out_'], [name+'_rnn0_out'], axes=[1]), # Layer 1 @@ -4866,18 +4865,13 @@ def convert_RNN(node, **kwargs): ] elif num_layers == 1: - - create_tensor([1], name+'_1', kwargs['initializer']) - create_tensor([state_size], name+'_state_size', kwargs['initializer']) + # create_tensor([state_size], name+'_state_size', kwargs['initializer']) create_tensor([2*state_size], name+'_2*state_size', kwargs['initializer']) create_tensor([state_size*state_size], name+'_state_size^2', kwargs['initializer']) create_tensor([1, state_size, state_size], name+'_R_shape', kwargs['initializer']) create_tensor([1, 2*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], - [name+'_seq_length', name+'_batch_size', name+'_input_size'], name='split0'), # get W make_node('Mul', [name+'_state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -4895,7 +4889,7 @@ def convert_RNN(node, **kwargs): make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # compute RNN - make_node('RNN', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h], + make_node('RNN', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h'], [name+'0_', name+'1'], hidden_size=state_size, activations=activations), make_node('Squeeze', [name+'0_'], [name], axes=[1]), ] diff --git a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py index 92407d4e174b..856e9038f863 100644 --- a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py +++ b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py @@ -200,15 +200,15 @@ def create_tensor(tensor_list, tensor_name, initializer, dtype='int64'): dims = np.shape(tensor_np) if dtype == np.float16: tensor_np = tensor_np.view(dtype=np.uint16) - initializer.append( - onnx.helper.make_tensor( - name=tensor_name, - data_type=data_type, - dims=dims, - vals=tensor_np.flatten().tolist(), - raw=False - ) + tensor = onnx.helper.make_tensor( + name=tensor_name, + data_type=data_type, + dims=dims, + vals=tensor_np.flatten().tolist(), + raw=False ) + initializer.append(tensor) + return tensor def create_helper_trans_node(node_name, input_node): @@ -1034,20 +1034,38 @@ def convert_RNN(node, **kwargs): raise NotImplementedError('Currently RNN onnx export only supports state_outputs equals to True') state_size = int(attrs.get('state_size')) + + direction = 1 + if bidirectional != 'False': + direction = 2 + data = input_nodes[0] param = input_nodes[1] - initial_h = input_nodes[2] - - nodes = [] + dtype = get_input_dtypes(node, kwargs)[2] create_tensor([0], name+'_0', kwargs['initializer']) create_tensor([1], name+'_1', kwargs['initializer']) + create_tensor([state_size], name+'_state_size', kwargs['initializer']) + create_tensor([direction], name+'_direction', kwargs['initializer']) + tensor_1 = create_tensor([1], name+'_1_f', kwargs['initializer'], dtype) + + nodes = [ + make_node('Shape', [data], [name+'_data_shape']), + make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), + make_node('Concat', [name+'_direction', name+'_batch_size', name+'_state_size'], [name+'_concat'], axis=0), + make_node('ConstantOfShape', [name+'_concat'], [name+'_COS'], value=tensor_1), + make_node('Mul', [input_nodes[2], name+'_COS'], [name+'initial_h']), + + ] if mode == 'lstm': - initial_c = input_nodes[3] + nodes += [ + make_node('Mul', [input_nodes[3], name+'_COS'], [name+'initial_c']), + ] + if num_layers == 2: if bidirectional != 'False': - raise NotImplementedError('Currently RNN onnx export only supports bidirectional is False') + raise NotImplementedError('Currently lstm onnx export only supports bidirectional when num_layers = 1') create_tensor([8*state_size], name+'_8*state_size', kwargs['initializer']) create_tensor([4*state_size*state_size], name+'_4*state_size^2', kwargs['initializer']) create_tensor([1, 4*state_size, state_size], name+'_WR_shape', kwargs['initializer']) @@ -1055,9 +1073,6 @@ def convert_RNN(node, **kwargs): create_tensor([4*4*state_size*state_size], name+'_WR_offset', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), - # Layer 0 # get W make_node('Slice', [param, name+'_0', name+'_4*state_size^2'], [name+'_W0_1d']), @@ -1079,8 +1094,8 @@ def convert_RNN(node, **kwargs): name+'_B04', name+'_B07', name+'_B05', name+'_B06'], [name+'_B0_'], axis=0), make_node('Reshape', [name+'_B0_', name+'_B_shape'], [name+'_B0']), # get initial states - make_node('Split', [initial_h], [name+'_initial_h0', name+'_initial_h1'], axis=0), - make_node('Split', [initial_c], [name+'_initial_c0', name+'_initial_c1'], axis=0), + make_node('Split', [name+'initial_h'], [name+'_initial_h0', name+'_initial_h1'], axis=0), + make_node('Split', [name+'initial_c'], [name+'_initial_c0', name+'_initial_c1'], axis=0), # get seq_len make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), @@ -1127,9 +1142,6 @@ def convert_RNN(node, **kwargs): create_tensor([1, 8*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], - [name+'_seq_length', name+'_batch_size', name+'_input_size']), # get W make_node('Mul', [name+'_4*state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -1156,7 +1168,7 @@ def convert_RNN(node, **kwargs): make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # compute LSTM - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h, initial_c], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size), make_node('Squeeze', [name+'0_', name+'_1'], [name]), ] @@ -1169,9 +1181,6 @@ def convert_RNN(node, **kwargs): create_tensor([1, 8*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], - [name+'_seq_length', name+'_batch_size', name+'_input_size']), # get W_fwd make_node('Mul', [name+'_4*state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -1228,7 +1237,7 @@ def convert_RNN(node, **kwargs): make_node('Concat', [name+'_W_fwd', name+'_W_bwd'], [name+'_W'], axis=0), make_node('Concat', [name+'_R_fwd', name+'_R_bwd'], [name+'_R'], axis=0), make_node('Concat', [name+'_B_fwd', name+'_B_bwd'], [name+'_B'], axis=0), - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h, initial_c], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size, direction='bidirectional'), make_node('Transpose', [name+'0_'], [name+'0_t'], perm=[0, 2, 1, 3]), make_node('Concat', [name+'_seq_length', name+'_batch_size', name+'_-1'], @@ -1247,9 +1256,6 @@ def convert_RNN(node, **kwargs): create_tensor([4*3*state_size*state_size], name+'_WR_offset', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), - # Layer 0 # get W make_node('Slice', [param, name+'_0', name+'_3*state_size^2'], [name+'_W0_1d']), @@ -1271,7 +1277,7 @@ def convert_RNN(node, **kwargs): name+'_B04', name+'_B03', name+'_B05'], [name+'_B0_'], axis=0), make_node('Reshape', [name+'_B0_', name+'_B_shape'], [name+'_B0']), # get initial states - make_node('Split', [initial_h], [name+'_initial_h0', name+'_initial_h1'], axis=0), + make_node('Split', [name+'initial_h'], [name+'_initial_h0', name+'_initial_h1'], axis=0), # get seq_len make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), @@ -1317,8 +1323,6 @@ def convert_RNN(node, **kwargs): create_tensor([1, 6*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), # get W make_node('Mul', [name+'_3*state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -1344,7 +1348,7 @@ def convert_RNN(node, **kwargs): make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # compute GRU - make_node('GRU', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h], + make_node('GRU', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h'], [name+'0_', name+'1'], hidden_size=state_size, linear_before_reset=1), make_node('Squeeze', [name+'0_', name+'_1'], [name]), ] @@ -1363,9 +1367,6 @@ def convert_RNN(node, **kwargs): create_tensor([4*state_size*state_size], name+'_WR_offset', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', name+'_input_size']), - # Layer 0 # get W make_node('Slice', [param, name+'_0', name+'_state_size^2'], [name+'_W0_1d']), @@ -1379,7 +1380,7 @@ def convert_RNN(node, **kwargs): make_node('Slice', [param, name+'_WR_offset', name+'_B0_offset'], [name+'_B0_1d']), make_node('Reshape', [name+'_B0_1d', name+'_B_shape'], [name+'_B0']), # get initial states - make_node('Split', [initial_h], [name+'_initial_h0', name+'_initial_h1'], axis=0), + make_node('Split', [name+'initial_h'], [name+'_initial_h0', name+'_initial_h1'], axis=0), # get seq_len make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), @@ -1410,16 +1411,13 @@ def convert_RNN(node, **kwargs): ] elif num_layers == 1: - create_tensor([state_size], name+'_state_size', kwargs['initializer']) + # create_tensor([state_size], name+'_state_size', kwargs['initializer']) create_tensor([2*state_size], name+'_2*state_size', kwargs['initializer']) create_tensor([state_size*state_size], name+'_state_size^2', kwargs['initializer']) create_tensor([1, state_size, state_size], name+'_R_shape', kwargs['initializer']) create_tensor([1, 2*state_size], name+'_B_shape', kwargs['initializer']) nodes += [ - make_node('Shape', [data], [name+'_data_shape']), - make_node('Split', [name+'_data_shape'], [name+'_seq_length', name+'_batch_size', - name+'_input_size'], name='split0'), # get W make_node('Mul', [name+'_state_size', name+'_input_size'], [name+'_mul0']), make_node('Slice', [param, name+'_0', name+'_mul0'], [name+'_W_1d']), @@ -1437,7 +1435,7 @@ def convert_RNN(node, **kwargs): make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # compute RNN - make_node('RNN', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', initial_h], + make_node('RNN', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h'], [name+'0_', name+'1'], hidden_size=state_size, activations=activations), make_node('Squeeze', [name+'0_', name+'_1'], [name]), ] diff --git a/tests/python-pytest/onnx/test_operators.py b/tests/python-pytest/onnx/test_operators.py index d39ba896dd99..dc6d389833a7 100644 --- a/tests/python-pytest/onnx/test_operators.py +++ b/tests/python-pytest/onnx/test_operators.py @@ -1235,6 +1235,7 @@ def test_onnx_export_sequence_reverse(tmp_path, dtype, params): M1 = def_model('SequenceReverse', use_sequence_length=True) op_export_test('SequenceReverse1', M1, [x, seq_len], tmp_path) + @pytest.mark.parametrize('mode', ['lstm', 'gru', 'rnn_tanh', 'rnn_relu']) @pytest.mark.parametrize('dtype', ['float32']) @pytest.mark.parametrize('state_size', [16, 32]) @@ -1263,13 +1264,13 @@ def test_onnx_export_RNN(tmp_path, mode, dtype, state_size, input_size, num_laye factor = 4 M = def_model('RNN', mode=mode, state_size=state_size, state_outputs=True, num_layers=num_layers, p=0, bidirectional=bidirectional) - x = mx.nd.random.normal(0, 10, (seq_length, batch_size, input_size), dtype=dtype) + x = mx.nd.random.normal(0, 10, (seq_length, batch_size, input_size)).astype(dtype) param = mx.nd.random.normal(0, 1, [b*num_layers*factor*state_size*input_size + b*num_layers*factor*state_size*state_size + - b*num_layers*2*factor*state_size], dtype=dtype) - state = mx.nd.random.uniform(-1, 1, [b*num_layers, batch_size, state_size], dtype=dtype) + b*num_layers*2*factor*state_size]).astype(dtype) + state = mx.nd.random.uniform(-1, 1, [b*num_layers, batch_size, state_size]).astype(dtype) if mode == 'lstm': - cell = mx.nd.random.uniform(-1, 1, [b*num_layers, batch_size, state_size], dtype=dtype) + cell = mx.nd.random.uniform(-1, 1, [b*num_layers, batch_size, state_size]).astype(dtype) op_export_test('rnn', M, [x, param, state, cell], tmp_path) elif mode == 'rnn_relu': # set large atol as relu can outputs big numbers From fa4231ecd0a81a8ab6e7ebb8086c29bdb4dde5df Mon Sep 17 00:00:00 2001 From: Wei Chu Date: Mon, 10 May 2021 13:26:18 -0700 Subject: [PATCH 2/2] fix sanity --- .../mx2onnx/_op_translations/_op_translations_opset12.py | 7 ++++--- .../mx2onnx/_op_translations/_op_translations_opset13.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py index 2014b057f885..c535ce9db219 100644 --- a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py +++ b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py @@ -4622,7 +4622,8 @@ def convert_RNN(node, **kwargs): make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # compute LSTM - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', + name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size), make_node('Squeeze', [name+'0_'], [name], axes=[1]), ] @@ -4691,7 +4692,8 @@ def convert_RNN(node, **kwargs): make_node('Concat', [name+'_W_fwd', name+'_W_bwd'], [name+'_W'], axis=0), make_node('Concat', [name+'_R_fwd', name+'_R_bwd'], [name+'_R'], axis=0), make_node('Concat', [name+'_B_fwd', name+'_B_bwd'], [name+'_B'], axis=0), - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', + name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size, direction='bidirectional'), make_node('Transpose', [name+'0_'], [name+'0_t'], perm=[0, 2, 1, 3]), make_node('Concat', [name+'_seq_length', name+'_batch_size', name+'_-1'], @@ -4865,7 +4867,6 @@ def convert_RNN(node, **kwargs): ] elif num_layers == 1: - # create_tensor([state_size], name+'_state_size', kwargs['initializer']) create_tensor([2*state_size], name+'_2*state_size', kwargs['initializer']) create_tensor([state_size*state_size], name+'_state_size^2', kwargs['initializer']) create_tensor([1, state_size, state_size], name+'_R_shape', kwargs['initializer']) diff --git a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py index 856e9038f863..25c518faf6af 100644 --- a/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py +++ b/python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py @@ -1168,7 +1168,8 @@ def convert_RNN(node, **kwargs): make_node('Tile', [name+'_seq_length', name+'_batch_size'], [name+'_seq_len_']), make_node("Cast", [name+'_seq_len_'], [name+"_seq_len"], to=int(TensorProto.INT32)), # compute LSTM - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', + name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size), make_node('Squeeze', [name+'0_', name+'_1'], [name]), ] @@ -1237,7 +1238,8 @@ def convert_RNN(node, **kwargs): make_node('Concat', [name+'_W_fwd', name+'_W_bwd'], [name+'_W'], axis=0), make_node('Concat', [name+'_R_fwd', name+'_R_bwd'], [name+'_R'], axis=0), make_node('Concat', [name+'_B_fwd', name+'_B_bwd'], [name+'_B'], axis=0), - make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', name+'_seq_len', name+'initial_h', name+'initial_c'], + make_node('LSTM', [data, name+'_W', name+'_R', name+'_B', + name+'_seq_len', name+'initial_h', name+'initial_c'], [name+'0_', name+'1', name+'2'], hidden_size=state_size, direction='bidirectional'), make_node('Transpose', [name+'0_'], [name+'0_t'], perm=[0, 2, 1, 3]), make_node('Concat', [name+'_seq_length', name+'_batch_size', name+'_-1'], @@ -1411,7 +1413,6 @@ def convert_RNN(node, **kwargs): ] elif num_layers == 1: - # create_tensor([state_size], name+'_state_size', kwargs['initializer']) create_tensor([2*state_size], name+'_2*state_size', kwargs['initializer']) create_tensor([state_size*state_size], name+'_state_size^2', kwargs['initializer']) create_tensor([1, state_size, state_size], name+'_R_shape', kwargs['initializer'])