From b6a6475621d907f9dc82401830a874304b2fc8f1 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Thu, 12 Aug 2021 10:03:57 +0300 Subject: [PATCH 1/9] GRU cell was implemented in common.py. GRU was supported on pytorch frontend side --- python/tvm/relay/frontend/common.py | 90 ++++++++++++ python/tvm/relay/frontend/pytorch.py | 201 ++++++++++++++++++++++++++- 2 files changed, 290 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py index 077b942ddf01..a635dcdc4771 100755 --- a/python/tvm/relay/frontend/common.py +++ b/python/tvm/relay/frontend/common.py @@ -658,6 +658,96 @@ def unbind(data, axis=0): return _expr.TupleWrapper(_expr.Tuple(ret), selections) +def gru_cell( + input_seqs, + hidden_state, + hidden_size, + w_inp, + w_hid, + b_inp=None, + b_hid=None, + r_act=_op.sigmoid, + z_act=_op.sigmoid, + n_act=_op.tanh, + backwards=False, +): + """ + Common implementation of GRU cell for all frontends of TVM + TODO(vvchernov): currently it is used by pytorch. Extend for other frontends + + Parameters + ---------- + input_seqs : List[relay.Expr] + The sequence of input tensors + Input tensor should be 2d while issue #8412 is not resolved + Shape = (batch, feature_size) + hidden_state : relay.Expr + Hidden state. shape = (batch_size, hidden_size) + hidden_size : int + The number of features in the hidden state. It is needed for correct and quick split of weights. + w_inp, w_hid : relay.Expr + weight matrices. wi shape = (3 * hidden_size, feature_size) + wh shape = (3 * hidden_size, hidden_size) + NOTE: wi = (w_ir|w_iz|w_in) for reset, update and new gates. + The order is important for correct GRU calculation! + b_inp, b_hid : relay.Expr + bias matrices. The same order of internal parts as for weights. shape = (3 * hidden_size) + r_act : relay.op + activation funtion for reset gate. it is sigmoid by default + z_act : relay.op + activation funtion for update gate. it is sigmoid by default + n_act : relay.op + activation funtion for new gate. it is tanh by default + backwards : bool + Flag for reverse pass of GRU + + Returns + ------- + result : List[relay.Expr], relay.Expr, relay.Expr + The sequence of computed result, final hidden and cell state + """ + + outputs_list = [] + for x_t in input_seqs if not backwards else reversed(input_seqs): + # x_t shape = (batch, feature size), step shape = (batch, feature size + hidden_size) + step = _op.concatenate([x_t, hidden_state], axis=1) + w_irz, w_in = _op.split(w_inp, [2*hidden_size], axis=0) + w_hrz, w_hn = _op.split(w_hid, [2*hidden_size], axis=0) + cat_w = _op.concatenate([w_irz, w_hrz], axis=1) + # Instead of nn.dense(x_t, w_inp) + nn.dense(hidden_state, w_hid) + # nn.dense(step, cat_w) is used + # gates shape = (batch, 2 * hidden_size) + rz_gates = _op.nn.dense(step, cat_w) + # Add biases + if b_inp is not None: + b_irz, b_in = _op.split(b_inp, [2*hidden_size], axis=0) + rz_gates += b_irz + if b_hid is not None: + b_hrz, b_hn = _op.split(b_hid, [2*hidden_size], axis=0) + rz_gates += b_hrz + # TODO(vvchernov): check similarity of r_act and z_act and change sequence act->split + # any gate shape = (batch, hidden_size) + r_gate, z_gate = _op.split(rz_gates, 2, axis=-1) + + r_gate = r_act(r_gate) + z_gate = z_act(z_gate) + + ni_gate = _op.nn.dense(x_t, w_in) + if b_inp is not None: + ni_gate += b_in + nh_gate = _op.nn.dense(hidden_state, w_hn) + if b_hid is not None: + nh_gate += b_hn + + n_gate = n_act(ni_gate + r_gate * nh_gate) + + hidden_state = (_op.ones_like(z_gate) - z_gate) * n_gate + z_gate * hidden_state + + outputs_list.append(hidden_state) # [seq_num, (batch, hidden_size)] + + return outputs_list, hidden_state + + def lstm_cell( input_seqs, hidden_state, diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index 7c10889ce17e..741e7f0d2be8 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -39,7 +39,7 @@ from ..prelude import Prelude, StaticTensorArrayOps from ..ty import Any, TensorType, TupleType from . import qnn_torch -from .common import AttrCvt, get_relay_op, unbind, lstm_cell +from .common import AttrCvt, get_relay_op, unbind, lstm_cell, gru_cell from .common import infer_value as _infer_value from .common import infer_shape as _infer_shape from .common import infer_value_simulated as _infer_value_simulated @@ -2315,6 +2315,204 @@ def flip(self, inputs, input_types): axis = inputs[1] return _op.transform.reverse(data, axis=axis[0]) + def bidir_gru_cell( + self, + input_seqs, + hidden_size, + weights_dicts, + ): + """ + Bidirectional GRU cell + """ + seq_len = len(input_seqs) + forward_outputs, fw_H_t = gru_cell( + input_seqs, + hidden_size=hidden_size, + **weights_dicts[0], + ) + + reverse_outputs, rev_H_t = gru_cell( + input_seqs, + hidden_size=hidden_size, + **weights_dicts[1], + backwards=True, + ) + + final_outputs = [] + for i in range(seq_len): + final_outputs.append( + _op.concatenate([forward_outputs[i], reverse_outputs[seq_len - 1 - i]], axis=-1) + ) + + return final_outputs, _op.stack([fw_H_t, rev_H_t], axis=0) + + def gru_layers(self, input_data, layer_weights_dicts, bidirectional, hidden_size, dropout_p=0.0): + """ + Methods iterates layers for Stacked LSTM + """ + layers_num = len(layer_weights_dicts) + # split input sequence to samples set + input_seqs = unbind(input_data, 0) # [seq_num, (batch, feature_size)] + output_hiddens = [] + for i in range(layers_num): + weights_dicts = layer_weights_dicts[i] + # input_seqs shape = [seq_num, (batch, feature_size)] or + # [seq_num, (batch, 2*feature_size)] for bidirectional + if bidirectional: + input_seqs, H_t = self.bidir_gru_cell(input_seqs, hidden_size, weights_dicts) + else: + input_seqs, H_t = gru_cell(input_seqs, **weights_dicts[0], hidden_size=hidden_size) + + output_hiddens.append(H_t) + + # TODO (vvchernov): in pytorch implementation train is also checked + # see https://github.com/pytorch/pytorch/blob/70c8daf43946b53af6493d058899ef952d27d339 + # /aten/src/ATen/native/RNN.cpp#L1054 + if dropout_p != 0 and i < layers_num - 1: + # for input in input_seqs: + # input = _op.dropout(input, dropout_p) + raise NotImplementedError("Dropout for LSTM has not been supported yet!") + final_hiddens = [] + if bidirectional: + for output_hidden in output_hiddens: + final_hiddens.append(output_hidden[0]) + final_hiddens.append(output_hidden[1]) + else: + final_hiddens = output_hiddens + + return _op.stack(input_seqs, 0), final_hiddens + + def gru(self, inputs, input_types): + """ + Description of GRU in pytorch:https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru#torch.nn.GRU + """ + # TODO (vvchernov): support dropout + assert len(inputs) == 9, "Input of size 9 is expected" + # Unpack inputs, note that if optional and not provided then value will be None. + _X = inputs[0] + # _X shape (seq_num, batch, feature_size) or (batch, seq_num, feature_size) + + hidden_state = inputs[1] + # Hidden state shape (hidden_layers_num, batch, hidden_size) + + _weights = inputs[2] + # Wi layer[0] shape (3 * hidden_size, feature_size) + # Wh layer[0] shape (3 * hidden_size, hidden_size) + # Bi layer[0] shape (3 * hidden_size) + # Bh layer[0] shape (3 * hidden_size) + + # Wi layer[>0] shape (3 * hidden_size, hidden_size * num_directions) + # Wh layer[>0] shape (3 * hidden_size, hidden_size) + # Bi layer[>0] shape (3 * hidden_size) + # Bh layer[>0] shape (3 * hidden_size) + + # Scalar inputs + has_biases = inputs[3] + num_layers = inputs[4] + dropout_p = inputs[5] # dropout probability, if 0.0 it means there is no dropout + # train = inputs[6] + bidirectional = inputs[7] + batch_first = inputs[8] + + num_directions = 1 + if bidirectional: + num_directions = 2 + + rsd = len(_weights) % num_layers + assert rsd == 0, "The number of weights must be a multiple of the number of layers!" + rsd = (len(_weights) / num_layers) % num_directions + assert ( + rsd == 0 + ), "The number of weights in layer must be a multiple of the number of directions!" + + weights_num = int(len(_weights) / num_layers / num_directions) + if has_biases: + assert weights_num == 4, "The weights number in layer is expected equal to 4" + else: + assert weights_num == 2, "The weights number in layer is expected equal to 2" + + X = _op.transpose(_X, (1, 0, 2)) if batch_first else _X + # TODO (vvchernov): Which data type should be used? from input or weights? + # Instead of it _infer_type(X).checked_type.dtype can be used + X_dtype = input_types[0] + X_shape = _infer_shape(X) # (seq_num, batch, feature_size) + + hidden_size = _infer_shape(_weights[0])[0] / 3 + batch_size = X_shape[1] + + # Initialize hidden states if not provided. + layers_h = [] + hidden_layers_num = num_directions * num_layers + if hidden_state is None: + h_0 = _op.zeros((batch_size, hidden_size), X_dtype) + for i in range(hidden_layers_num): + layers_h.append(h_0) + else: + layers_h = unbind(hidden_state, 0) + + layer_weights_dicts = [] + k = 0 # layer counter + if has_biases: + names = ["hidden_state", "w_inp", "w_hid", "b_inp", "b_hid"] + if bidirectional: + rsd = len(_weights) % (2 * weights_num) + assert rsd == 0, "got an incorrect number of LSTM weights" + for i in range(0, len(_weights), 2 * weights_num): + fw_tensors = [layers_h[2 * k], *_weights[i : i + 4]] + fw_weights_dict = dict(zip(names, fw_tensors)) + j = i + weights_num + rev_tensors = [layers_h[2 * k + 1], *_weights[j : j + 4]] + rev_weights_dict = dict(zip(names, rev_tensors)) + layer_weights_dicts.append([fw_weights_dict, rev_weights_dict]) + k += 1 + else: + assert len(_weights) % weights_num == 0, "got an incorrect number of LSTM weights" + for i in range(0, len(_weights), weights_num): + fw_tensors = [layers_h[k], *_weights[i : i + 4]] + fw_weights_dict = dict(zip(names, fw_tensors)) + layer_weights_dicts.append([fw_weights_dict]) + k += 1 + else: + names = ["hidden_state", "w_inp", "w_hid"] + if bidirectional: + rsd = len(_weights) % (2 * weights_num) + assert rsd == 0, "got an incorrect number of LSTM weights" + for i in range(0, len(_weights), 2 * weights_num): + fw_tensors = [layers_h[2 * k], *_weights[i : i + 2]] + fw_weights_dict = dict(zip(names, fw_tensors)) + j = i + weights_num + rev_tensors = [layers_h[2 * k + 1], *_weights[j : j + 2]] + rev_weights_dict = dict(zip(names, rev_tensors)) + layer_weights_dicts.append([fw_weights_dict, rev_weights_dict]) + k += 1 + else: + assert len(_weights) % weights_num == 0, "got an incorrect number of LSTM weights" + for i in range(0, len(_weights), weights_num): + fw_tensors = [layers_h[k], *_weights[i : i + 2]] + fw_weights_dict = dict(zip(names, fw_tensors)) + layer_weights_dicts.append([fw_weights_dict]) + k += 1 + assert ( + len(layer_weights_dicts) == num_layers and k == num_layers + ), "For stacked GRU number of weights sets should be the same as number of layers!" + + outputs = self.gru_layers( + X, + layer_weights_dicts, + bidirectional, + hidden_size=hidden_size, + dropout_p=dropout_p, + ) + + # output shape = (seq_num, batch, hidden_size) or + # (seq_num, batch, 2*feature_size) for bidirectional + output = outputs[0] + + if batch_first: + output = _op.transpose(output, (1, 0, 2)) + + return (output, _op.stack(outputs[1], 0)) + def bidir_lstm_cell( self, input_seqs, @@ -2792,6 +2990,7 @@ def create_convert_map(self): "aten::nll_loss": self.nll_loss, "aten::nll_loss2d": self.nll_loss, "aten::flip": self.flip, + "aten::gru": self.gru, "aten::lstm": self.lstm, } From 549ff68b91637dbb221d831ee7dcc2bcb7929fdb Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Mon, 16 Aug 2021 12:01:21 +0300 Subject: [PATCH 2/9] update GRU in common.py and onnx frontend --- python/tvm/relay/frontend/common.py | 87 +++++++++++++++++++---------- python/tvm/relay/frontend/onnx.py | 2 +- 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py index a635dcdc4771..bb703081adb7 100755 --- a/python/tvm/relay/frontend/common.py +++ b/python/tvm/relay/frontend/common.py @@ -666,10 +666,10 @@ def gru_cell( w_hid, b_inp=None, b_hid=None, - r_act=_op.sigmoid, - z_act=_op.sigmoid, + rz_act=_op.sigmoid, n_act=_op.tanh, backwards=False, + linear_before_reset=True, ): """ Common implementation of GRU cell for all frontends of TVM @@ -710,38 +710,63 @@ def gru_cell( outputs_list = [] for x_t in input_seqs if not backwards else reversed(input_seqs): # x_t shape = (batch, feature size), step shape = (batch, feature size + hidden_size) - step = _op.concatenate([x_t, hidden_state], axis=1) - w_irz, w_in = _op.split(w_inp, [2*hidden_size], axis=0) - w_hrz, w_hn = _op.split(w_hid, [2*hidden_size], axis=0) - cat_w = _op.concatenate([w_irz, w_hrz], axis=1) - # Instead of nn.dense(x_t, w_inp) + nn.dense(hidden_state, w_hid) - # nn.dense(step, cat_w) is used - # gates shape = (batch, 2 * hidden_size) - rz_gates = _op.nn.dense(step, cat_w) - # Add biases + # step = _op.concatenate([x_t, hidden_state], axis=1) + # w_irz, w_in = _op.split(w_inp, [2*hidden_size], axis=0) + # w_hrz, w_hn = _op.split(w_hid, [2*hidden_size], axis=0) + # cat_w = _op.concatenate([w_irz, w_hrz], axis=1) + # # Instead of nn.dense(x_t, w_inp) + nn.dense(hidden_state, w_hid) + # # nn.dense(step, cat_w) is used + # # gates shape = (batch, 2 * hidden_size) + # rz_gates = _op.nn.dense(step, cat_w) + # # Add biases + # if b_inp is not None: + # b_irz, b_in = _op.split(b_inp, [2*hidden_size], axis=0) + # rz_gates += b_irz + # if b_hid is not None: + # b_hrz, b_hn = _op.split(b_hid, [2*hidden_size], axis=0) + # rz_gates += b_hrz + # # TODO(vvchernov): check similarity of r_act and z_act and change sequence act->split + # # any gate shape = (batch, hidden_size) + # r_gate, z_gate = _op.split(rz_gates, 2, axis=-1) + + # r_gate = r_act(r_gate) + # z_gate = z_act(z_gate) + + # ni_gate = _op.nn.dense(x_t, w_in) + # if b_inp is not None: + # ni_gate += b_in + # nh_gate = _op.nn.dense(hidden_state, w_hn) + # if b_hid is not None: + # nh_gate += b_hn + + # n_gate = n_act(ni_gate + r_gate * nh_gate) + + xwt = _op.nn.dense(x_t, w_inp) + i_r, i_z, i_n = _op.split(xwt, 3, axis=1) + h_r, h_z, h_n = _op.split(w_hid, 3, axis=0) + r_gate = i_r + _op.nn.dense(hidden_state, h_r) + z_gate = i_z + _op.nn.dense(hidden_state, h_z) + # TODO(vvchernov): It is assumed that both bias are or not if b_inp is not None: - b_irz, b_in = _op.split(b_inp, [2*hidden_size], axis=0) - rz_gates += b_irz - if b_hid is not None: - b_hrz, b_hn = _op.split(b_hid, [2*hidden_size], axis=0) - rz_gates += b_hrz - # TODO(vvchernov): check similarity of r_act and z_act and change sequence act->split - # any gate shape = (batch, hidden_size) - r_gate, z_gate = _op.split(rz_gates, 2, axis=-1) - - r_gate = r_act(r_gate) - z_gate = z_act(z_gate) - - ni_gate = _op.nn.dense(x_t, w_in) - if b_inp is not None: - ni_gate += b_in - nh_gate = _op.nn.dense(hidden_state, w_hn) - if b_hid is not None: - nh_gate += b_hn + i_br, i_bz, i_bn = _op.split(b_inp, 3, axis=-1) + h_br, h_bz, h_bn = _op.split(b_hid, 3, axis=-1) + z_gate += i_bz + h_bz + r_gate += i_br + h_br + if linear_before_reset: + n_gate = i_n + i_bn + (r_gate * (_op.nn.dense(hidden_state, h_n) + h_bn)) + else: + n_gate = i_n + i_bn + _op.nn.dense((r_gate * hidden_state), h_n) + h_bn + else: + if linear_before_reset: + n_gate = i_n + (r_gate * (_op.nn.dense(hidden_state, h_n))) + else: + n_gate = i_n + _op.nn.dense((r_gate * hidden_state), h_n) - n_gate = n_act(ni_gate + r_gate * nh_gate) + r_gate = rz_act(r_gate) + z_gate = rz_act(z_gate) + n_gate = n_act(n_gate) - hidden_state = (_op.ones_like(z_gate) - z_gate) * n_gate + z_gate * hidden_state + hidden_state = (hidden_state - n_gate) * z_gate + n_gate outputs_list.append(hidden_state) # [seq_num, (batch, hidden_size)] diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 0f78c32ef59f..dd47d4d7ff99 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -2386,7 +2386,7 @@ def generate_gru( r = f_act(r) h = g_act(h) - H_t = ((_expr.const(1, dtype=W_dtype) - z) * h) + (z * H_t) + H_t = (H_t - h) * z + h h_list.append(_op.expand_dims(H_t, axis=0)) if backwards: From 59eefa1f84fccb09f70f9b6fddcb8ce425807845 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Tue, 17 Aug 2021 11:58:44 +0300 Subject: [PATCH 3/9] fix issue related to GRU accuracy in pytorch and ONNX frontend --- python/tvm/relay/frontend/common.py | 25 +++++++++++++------------ python/tvm/relay/frontend/onnx.py | 3 ++- python/tvm/relay/frontend/pytorch.py | 15 +++------------ 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py index bb703081adb7..9e16425e0868 100755 --- a/python/tvm/relay/frontend/common.py +++ b/python/tvm/relay/frontend/common.py @@ -743,26 +743,27 @@ def gru_cell( xwt = _op.nn.dense(x_t, w_inp) i_r, i_z, i_n = _op.split(xwt, 3, axis=1) - h_r, h_z, h_n = _op.split(w_hid, 3, axis=0) - r_gate = i_r + _op.nn.dense(hidden_state, h_r) - z_gate = i_z + _op.nn.dense(hidden_state, h_z) + w_hr, w_hz, w_hn = _op.split(w_hid, 3, axis=0) + r_gate = i_r + _op.nn.dense(hidden_state, w_hr) + z_gate = i_z + _op.nn.dense(hidden_state, w_hz) # TODO(vvchernov): It is assumed that both bias are or not if b_inp is not None: - i_br, i_bz, i_bn = _op.split(b_inp, 3, axis=-1) - h_br, h_bz, h_bn = _op.split(b_hid, 3, axis=-1) - z_gate += i_bz + h_bz - r_gate += i_br + h_br + b_ir, b_iz, b_in = _op.split(b_inp, 3, axis=-1) + b_hr, b_hz, b_hn = _op.split(b_hid, 3, axis=-1) + r_gate += b_ir + b_hr + r_gate = rz_act(r_gate) + z_gate += b_iz + b_hz if linear_before_reset: - n_gate = i_n + i_bn + (r_gate * (_op.nn.dense(hidden_state, h_n) + h_bn)) + n_gate = i_n + b_in + (r_gate * (_op.nn.dense(hidden_state, w_hn) + b_hn)) else: - n_gate = i_n + i_bn + _op.nn.dense((r_gate * hidden_state), h_n) + h_bn + n_gate = i_n + b_in + _op.nn.dense((r_gate * hidden_state), w_hn) + b_hn else: + r_gate = rz_act(r_gate) if linear_before_reset: - n_gate = i_n + (r_gate * (_op.nn.dense(hidden_state, h_n))) + n_gate = i_n + (r_gate * (_op.nn.dense(hidden_state, w_hn))) else: - n_gate = i_n + _op.nn.dense((r_gate * hidden_state), h_n) + n_gate = i_n + _op.nn.dense((r_gate * hidden_state), w_hn) - r_gate = rz_act(r_gate) z_gate = rz_act(z_gate) n_gate = n_act(n_gate) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index dd47d4d7ff99..fe3dcb6c0792 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -2372,18 +2372,19 @@ def generate_gru( rbz, rbr, rbh = _op.split(RB, 3, axis=-1) z += wbz + rbz r += wbr + rbr + r = f_act(r) if linear_before_reset: h = ch + (r * (_op.nn.dense(H_t, rh) + rbh)) + wbh else: h = ch + _op.nn.dense((r * H_t), rh) + wbh + rbh else: + r = f_act(r) if linear_before_reset: h = ch + (r * (_op.nn.dense(H_t, rh))) else: h = ch + _op.nn.dense((r * H_t), rh) z = f_act(z) - r = f_act(r) h = g_act(h) H_t = (H_t - h) * z + h diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index 741e7f0d2be8..dff1ee3153a0 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -2372,15 +2372,8 @@ def gru_layers(self, input_data, layer_weights_dicts, bidirectional, hidden_size # for input in input_seqs: # input = _op.dropout(input, dropout_p) raise NotImplementedError("Dropout for LSTM has not been supported yet!") - final_hiddens = [] - if bidirectional: - for output_hidden in output_hiddens: - final_hiddens.append(output_hidden[0]) - final_hiddens.append(output_hidden[1]) - else: - final_hiddens = output_hiddens - return _op.stack(input_seqs, 0), final_hiddens + return _op.stack(input_seqs, 0), _op.stack(output_hiddens, 0) def gru(self, inputs, input_types): """ @@ -2496,7 +2489,7 @@ def gru(self, inputs, input_types): len(layer_weights_dicts) == num_layers and k == num_layers ), "For stacked GRU number of weights sets should be the same as number of layers!" - outputs = self.gru_layers( + output, out_hidden_state = self.gru_layers( X, layer_weights_dicts, bidirectional, @@ -2506,12 +2499,10 @@ def gru(self, inputs, input_types): # output shape = (seq_num, batch, hidden_size) or # (seq_num, batch, 2*feature_size) for bidirectional - output = outputs[0] - if batch_first: output = _op.transpose(output, (1, 0, 2)) - return (output, _op.stack(outputs[1], 0)) + return (output, out_hidden_state) def bidir_lstm_cell( self, From d2f49145d9ab07837c76d72813d14e34a487b421 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Tue, 17 Aug 2021 20:04:49 +0300 Subject: [PATCH 4/9] small fixes and remove excess --- python/tvm/relay/frontend/common.py | 35 ---------------------------- python/tvm/relay/frontend/pytorch.py | 15 +++++------- 2 files changed, 6 insertions(+), 44 deletions(-) diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py index 9e16425e0868..517b63dca273 100755 --- a/python/tvm/relay/frontend/common.py +++ b/python/tvm/relay/frontend/common.py @@ -661,7 +661,6 @@ def unbind(data, axis=0): def gru_cell( input_seqs, hidden_state, - hidden_size, w_inp, w_hid, b_inp=None, @@ -683,8 +682,6 @@ def gru_cell( Shape = (batch, feature_size) hidden_state : relay.Expr Hidden state. shape = (batch_size, hidden_size) - hidden_size : int - The number of features in the hidden state. It is needed for correct and quick split of weights. w_inp, w_hid : relay.Expr weight matrices. wi shape = (3 * hidden_size, feature_size) wh shape = (3 * hidden_size, hidden_size) @@ -709,38 +706,6 @@ def gru_cell( outputs_list = [] for x_t in input_seqs if not backwards else reversed(input_seqs): - # x_t shape = (batch, feature size), step shape = (batch, feature size + hidden_size) - # step = _op.concatenate([x_t, hidden_state], axis=1) - # w_irz, w_in = _op.split(w_inp, [2*hidden_size], axis=0) - # w_hrz, w_hn = _op.split(w_hid, [2*hidden_size], axis=0) - # cat_w = _op.concatenate([w_irz, w_hrz], axis=1) - # # Instead of nn.dense(x_t, w_inp) + nn.dense(hidden_state, w_hid) - # # nn.dense(step, cat_w) is used - # # gates shape = (batch, 2 * hidden_size) - # rz_gates = _op.nn.dense(step, cat_w) - # # Add biases - # if b_inp is not None: - # b_irz, b_in = _op.split(b_inp, [2*hidden_size], axis=0) - # rz_gates += b_irz - # if b_hid is not None: - # b_hrz, b_hn = _op.split(b_hid, [2*hidden_size], axis=0) - # rz_gates += b_hrz - # # TODO(vvchernov): check similarity of r_act and z_act and change sequence act->split - # # any gate shape = (batch, hidden_size) - # r_gate, z_gate = _op.split(rz_gates, 2, axis=-1) - - # r_gate = r_act(r_gate) - # z_gate = z_act(z_gate) - - # ni_gate = _op.nn.dense(x_t, w_in) - # if b_inp is not None: - # ni_gate += b_in - # nh_gate = _op.nn.dense(hidden_state, w_hn) - # if b_hid is not None: - # nh_gate += b_hn - - # n_gate = n_act(ni_gate + r_gate * nh_gate) - xwt = _op.nn.dense(x_t, w_inp) i_r, i_z, i_n = _op.split(xwt, 3, axis=1) w_hr, w_hz, w_hn = _op.split(w_hid, 3, axis=0) diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index dff1ee3153a0..f616c3bf7037 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -2318,7 +2318,6 @@ def flip(self, inputs, input_types): def bidir_gru_cell( self, input_seqs, - hidden_size, weights_dicts, ): """ @@ -2327,13 +2326,11 @@ def bidir_gru_cell( seq_len = len(input_seqs) forward_outputs, fw_H_t = gru_cell( input_seqs, - hidden_size=hidden_size, **weights_dicts[0], ) reverse_outputs, rev_H_t = gru_cell( input_seqs, - hidden_size=hidden_size, **weights_dicts[1], backwards=True, ) @@ -2346,7 +2343,7 @@ def bidir_gru_cell( return final_outputs, _op.stack([fw_H_t, rev_H_t], axis=0) - def gru_layers(self, input_data, layer_weights_dicts, bidirectional, hidden_size, dropout_p=0.0): + def gru_layers(self, input_data, layer_weights_dicts, bidirectional, dropout_p=0.0): """ Methods iterates layers for Stacked LSTM """ @@ -2359,9 +2356,9 @@ def gru_layers(self, input_data, layer_weights_dicts, bidirectional, hidden_size # input_seqs shape = [seq_num, (batch, feature_size)] or # [seq_num, (batch, 2*feature_size)] for bidirectional if bidirectional: - input_seqs, H_t = self.bidir_gru_cell(input_seqs, hidden_size, weights_dicts) + input_seqs, H_t = self.bidir_gru_cell(input_seqs, weights_dicts) else: - input_seqs, H_t = gru_cell(input_seqs, **weights_dicts[0], hidden_size=hidden_size) + input_seqs, H_t = gru_cell(input_seqs, **weights_dicts[0]) output_hiddens.append(H_t) @@ -2377,7 +2374,8 @@ def gru_layers(self, input_data, layer_weights_dicts, bidirectional, hidden_size def gru(self, inputs, input_types): """ - Description of GRU in pytorch:https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru#torch.nn.GRU + Description of GRU in pytorch: + https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru#torch.nn.GRU """ # TODO (vvchernov): support dropout assert len(inputs) == 9, "Input of size 9 is expected" @@ -2430,7 +2428,7 @@ def gru(self, inputs, input_types): X_dtype = input_types[0] X_shape = _infer_shape(X) # (seq_num, batch, feature_size) - hidden_size = _infer_shape(_weights[0])[0] / 3 + hidden_size = int(_infer_shape(_weights[0])[0] / 3) batch_size = X_shape[1] # Initialize hidden states if not provided. @@ -2493,7 +2491,6 @@ def gru(self, inputs, input_types): X, layer_weights_dicts, bidirectional, - hidden_size=hidden_size, dropout_p=dropout_p, ) From 439d076b843f48913b109d8ed6df6c128b9689b0 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Tue, 17 Aug 2021 15:56:39 +0300 Subject: [PATCH 5/9] common GRU was additionaly updated. tuned pytorch GRU was strongly accelerated --- python/tvm/relay/frontend/common.py | 49 ++++++++++++++++------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py index 517b63dca273..e0dce1e212c2 100755 --- a/python/tvm/relay/frontend/common.py +++ b/python/tvm/relay/frontend/common.py @@ -707,30 +707,35 @@ def gru_cell( outputs_list = [] for x_t in input_seqs if not backwards else reversed(input_seqs): xwt = _op.nn.dense(x_t, w_inp) - i_r, i_z, i_n = _op.split(xwt, 3, axis=1) - w_hr, w_hz, w_hn = _op.split(w_hid, 3, axis=0) - r_gate = i_r + _op.nn.dense(hidden_state, w_hr) - z_gate = i_z + _op.nn.dense(hidden_state, w_hz) - # TODO(vvchernov): It is assumed that both bias are or not - if b_inp is not None: - b_ir, b_iz, b_in = _op.split(b_inp, 3, axis=-1) - b_hr, b_hz, b_hn = _op.split(b_hid, 3, axis=-1) - r_gate += b_ir + b_hr - r_gate = rz_act(r_gate) - z_gate += b_iz + b_hz - if linear_before_reset: - n_gate = i_n + b_in + (r_gate * (_op.nn.dense(hidden_state, w_hn) + b_hn)) - else: - n_gate = i_n + b_in + _op.nn.dense((r_gate * hidden_state), w_hn) + b_hn + if linear_before_reset: + hwt = _op.nn.dense(hidden_state, w_hid) + # TODO(vvchernov): It is assumed that both bias are or not + if b_inp is not None: + xwt += b_inp + hwt += b_hid + i_r, i_z, i_n = _op.split(xwt, 3, axis=-1) + h_r, h_z, h_n = _op.split(hwt, 3, axis=-1) + r_gate = rz_act(i_r + h_r) + z_gate = rz_act(i_z + h_z) + n_gate = n_act(i_n + r_gate * h_n) else: - r_gate = rz_act(r_gate) - if linear_before_reset: - n_gate = i_n + (r_gate * (_op.nn.dense(hidden_state, w_hn))) + i_r, i_z, i_n = _op.split(xwt, 3, axis=1) + w_hr, w_hz, w_hn = _op.split(w_hid, 3, axis=0) + r_gate = i_r + _op.nn.dense(hidden_state, w_hr) + z_gate = i_z + _op.nn.dense(hidden_state, w_hz) + # TODO(vvchernov): It is assumed that both bias are or not + if b_inp is not None: + b_ir, b_iz, b_in = _op.split(b_inp, 3, axis=-1) + b_hr, b_hz, b_hn = _op.split(b_hid, 3, axis=-1) + r_gate += b_ir + b_hr + z_gate += b_iz + b_hz + i_n += b_in + h_n = _op.nn.dense((r_gate * hidden_state), w_hn) + b_hn else: - n_gate = i_n + _op.nn.dense((r_gate * hidden_state), w_hn) - - z_gate = rz_act(z_gate) - n_gate = n_act(n_gate) + h_n = _op.nn.dense((r_gate * hidden_state), w_hn) + r_gate = rz_act(r_gate) + z_gate = rz_act(z_gate) + n_gate = n_act(i_n + h_n) hidden_state = (hidden_state - n_gate) * z_gate + n_gate From 677eafbec7b810129697e014509fdf2ceca4de5f Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Wed, 18 Aug 2021 22:51:58 +0300 Subject: [PATCH 6/9] GRU cell in ONNX frontend was used from common.py. previous implementation was removed --- python/tvm/relay/frontend/onnx.py | 150 ++++++++++++++---------------- 1 file changed, 72 insertions(+), 78 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index fe3dcb6c0792..5471f67ea106 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -47,6 +47,7 @@ infer_value, new_var, unbind, + gru_cell, lstm_cell, ) @@ -2349,57 +2350,41 @@ class GRU(RNN): """Operator convert for GRU""" @classmethod - def generate_gru( - cls, X_steps, H_t, W, R, B, linear_before_reset, f_act, g_act, W_dtype, backwards=False + def bidir_gru_cell( + cls, + input_seqs, + weight_dicts, + acts, ): - """Create an unrolled gru loop. - - See https://github.com/onnx/onnx/blob/master/docs/Operators.md for math. """ - h_list = [] - seq_length = len(X_steps) - for i in range(seq_length): - step = X_steps[i] if not backwards else X_steps[seq_length - (i + 1)] - step = _op.squeeze(step, axis=[0]) - current = _op.nn.dense(step, W) - cz, cr, ch = _op.split(current, 3, axis=1) - rz, rr, rh = _op.split(R, 3, axis=0) - z = cz + _op.nn.dense(H_t, rz) - r = cr + _op.nn.dense(H_t, rr) - if B is not None: - WB, RB = _op.split(B, 2) - wbz, wbr, wbh = _op.split(WB, 3, axis=-1) - rbz, rbr, rbh = _op.split(RB, 3, axis=-1) - z += wbz + rbz - r += wbr + rbr - r = f_act(r) - if linear_before_reset: - h = ch + (r * (_op.nn.dense(H_t, rh) + rbh)) + wbh - else: - h = ch + _op.nn.dense((r * H_t), rh) + wbh + rbh - else: - r = f_act(r) - if linear_before_reset: - h = ch + (r * (_op.nn.dense(H_t, rh))) - else: - h = ch + _op.nn.dense((r * H_t), rh) - - z = f_act(z) - h = g_act(h) - - H_t = (H_t - h) * z + h - h_list.append(_op.expand_dims(H_t, axis=0)) + Bidirectional GRU cell + """ + seq_len = len(input_seqs) + forward_outputs, fw_H_t = gru_cell( + input_seqs, + **weight_dicts[0], + rz_act=acts[0], + n_act=acts[1], + ) - if backwards: - # Canonical view is hidden states from the first token not last - h_list = h_list[::-1] + reverse_outputs, rev_H_t = gru_cell( + input_seqs, + **weight_dicts[1], + rz_act=acts[2], + n_act=acts[3], + backwards=True, + ) - # Concatenate outputs and add back in direction axis. - concatenated = _op.concatenate(h_list, 0) - output = _op.expand_dims(concatenated, axis=1) - H_t = _op.expand_dims(H_t, axis=0) + final_outputs = [] + for i in range(seq_len): + final_outputs.append( + _op.stack([forward_outputs[i], reverse_outputs[seq_len - 1 - i]], axis=0) + ) - return output, H_t + return ( + _op.stack(final_outputs, axis=0), + _op.stack([fw_H_t, rev_H_t], axis=0), + ) @classmethod def _impl_v7(cls, inputs, attr, params): @@ -2417,20 +2402,14 @@ def _impl_v7(cls, inputs, attr, params): W_dtype = infer_type(Wp).checked_type.dtype if num_directions not in [1, 2]: - raise NotImplementedError( - f"Directions for GRUs should be either 1 or 2 got {num_directions}" - ) + raise ValueError("num_directions must be either 1 or 2!") X_shape = infer_shape(X) hidden_size = infer_shape(Rp)[-1] batch_size = X_shape[1] - # Initialize state if not provided. - # Otherwise remove bidirectional axis. if Hp_0 is None: Hp_0 = _op.zeros((num_directions, batch_size, hidden_size), W_dtype) - if Bp is None: - Bp = _op.zeros((num_directions, hidden_size * 6), W_dtype) if "activations" in attr: activations = attr["activations"] @@ -2461,39 +2440,54 @@ def _impl_v7(cls, inputs, attr, params): else: acts = [_op.sigmoid, _op.tanh] * 2 - result_output = [] - result_H = [] + # TODO (vvchernov): It can be replaced by _op.split if issue #8412 is resolved + X_steps = unbind(X, axis=0) - X_steps = _op.split(X, indices_or_sections=X_shape[0], axis=0) H_ts = _op.split(Hp_0, num_directions) Ws = _op.split(Wp, num_directions) Rs = _op.split(Rp, num_directions) - Bs = _op.split(Bp, num_directions) + if Bp is not None: + Bs = _op.split(Bp, num_directions) + + weights_dicts = [] for i in range(num_directions): - H_t = _op.squeeze(H_ts[i], axis=[0]) - W = _op.squeeze(Ws[i], axis=[0]) - R = _op.squeeze(Rs[i], axis=[0]) - B = _op.squeeze(Bs[i], axis=[0]) - f_act, g_act = acts[i * 2 : (i + 1) * 2] - output, H = GRU.generate_gru( - X_steps=X_steps, - H_t=H_t, - W=W, - R=R, - B=B, - linear_before_reset=linear_before_reset, - f_act=f_act, - g_act=g_act, - W_dtype=W_dtype, - backwards=i == 1, - ) + weights_dict = {} + + weights_dict["hidden_state"] = _op.squeeze(H_ts[i], axis=[0]) + weights_dict["linear_before_reset"] = linear_before_reset + + # Weights permutation: onnx format i-o-f-c, lstm cell format i-f-c-o + matz, matr, matn = _op.split(_op.squeeze(Ws[i], axis=[0]), 3) + weights_dict["w_inp"] = _op.concatenate([matr, matz, matn], axis=0) + matz, matr, matn = _op.split(_op.squeeze(Rs[i], axis=[0]), 3) + weights_dict["w_hid"] = _op.concatenate([matr, matz, matn], axis=0) + if Bp is not None: + Bi, Bh = _op.split(Bs[i], 2, -1) + matz, matr, matn = _op.split(_op.squeeze(Bi, axis=[0]), 3) + weights_dict["b_inp"] = _op.concatenate([matr, matz, matn], axis=0) + matz, matr, matn = _op.split(_op.squeeze(Bh, axis=[0]), 3) + weights_dict["b_hid"] = _op.concatenate([matr, matz, matn], axis=0) + weights_dicts.append(weights_dict) - result_output.append(output) - result_H.append(H) + if num_directions == 2: + output, H = GRU.bidir_gru_cell( + input_seqs=X_steps, + weight_dicts=weights_dicts, + acts=acts, + ) + else: + # outputs shape = [seqs_num, (batch_size, hidden_size)] + outputs, H = gru_cell( + input_seqs=X_steps, + **weights_dicts[0], + rz_act=acts[0], + n_act=acts[1], + ) - output = _op.concatenate(result_output, axis=1) - H = _op.concatenate(result_H, axis=0) + # output shape = (seqs_num, num_directions, batch_size, hidden_size) + output = _op.expand_dims(_op.stack(outputs, axis=0), axis=1) + H = _op.expand_dims(H, axis=0) return _expr.TupleWrapper(_expr.Tuple((output, H)), 2) From 1f889a397c550b1f03abc5426bf1075638d6dbec Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Fri, 20 Aug 2021 18:44:00 +0300 Subject: [PATCH 7/9] small fixes in comments --- python/tvm/relay/frontend/pytorch.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index f616c3bf7037..613643f091d7 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -2345,7 +2345,7 @@ def bidir_gru_cell( def gru_layers(self, input_data, layer_weights_dicts, bidirectional, dropout_p=0.0): """ - Methods iterates layers for Stacked LSTM + Methods iterates layers for Stacked GRU """ layers_num = len(layer_weights_dicts) # split input sequence to samples set @@ -2368,7 +2368,7 @@ def gru_layers(self, input_data, layer_weights_dicts, bidirectional, dropout_p=0 if dropout_p != 0 and i < layers_num - 1: # for input in input_seqs: # input = _op.dropout(input, dropout_p) - raise NotImplementedError("Dropout for LSTM has not been supported yet!") + raise NotImplementedError("Dropout for GRU has not been supported yet!") return _op.stack(input_seqs, 0), _op.stack(output_hiddens, 0) @@ -2447,7 +2447,7 @@ def gru(self, inputs, input_types): names = ["hidden_state", "w_inp", "w_hid", "b_inp", "b_hid"] if bidirectional: rsd = len(_weights) % (2 * weights_num) - assert rsd == 0, "got an incorrect number of LSTM weights" + assert rsd == 0, "got an incorrect number of GRU weights" for i in range(0, len(_weights), 2 * weights_num): fw_tensors = [layers_h[2 * k], *_weights[i : i + 4]] fw_weights_dict = dict(zip(names, fw_tensors)) @@ -2457,7 +2457,7 @@ def gru(self, inputs, input_types): layer_weights_dicts.append([fw_weights_dict, rev_weights_dict]) k += 1 else: - assert len(_weights) % weights_num == 0, "got an incorrect number of LSTM weights" + assert len(_weights) % weights_num == 0, "got an incorrect number of GRU weights" for i in range(0, len(_weights), weights_num): fw_tensors = [layers_h[k], *_weights[i : i + 4]] fw_weights_dict = dict(zip(names, fw_tensors)) @@ -2467,7 +2467,7 @@ def gru(self, inputs, input_types): names = ["hidden_state", "w_inp", "w_hid"] if bidirectional: rsd = len(_weights) % (2 * weights_num) - assert rsd == 0, "got an incorrect number of LSTM weights" + assert rsd == 0, "got an incorrect number of GRU weights" for i in range(0, len(_weights), 2 * weights_num): fw_tensors = [layers_h[2 * k], *_weights[i : i + 2]] fw_weights_dict = dict(zip(names, fw_tensors)) @@ -2477,7 +2477,7 @@ def gru(self, inputs, input_types): layer_weights_dicts.append([fw_weights_dict, rev_weights_dict]) k += 1 else: - assert len(_weights) % weights_num == 0, "got an incorrect number of LSTM weights" + assert len(_weights) % weights_num == 0, "got an incorrect number of GRU weights" for i in range(0, len(_weights), weights_num): fw_tensors = [layers_h[k], *_weights[i : i + 2]] fw_weights_dict = dict(zip(names, fw_tensors)) From 0e19a6f817d40884315b99b18f1a5b3baf7e087d Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Mon, 23 Aug 2021 10:03:03 +0300 Subject: [PATCH 8/9] fixes after review. GRU test was implemented for pytorch frontend --- python/tvm/relay/frontend/common.py | 8 +- .../pytorch/{test_lstms.py => test_rnns.py} | 305 +++++++++++++++--- 2 files changed, 255 insertions(+), 58 deletions(-) rename tests/python/frontend/pytorch/{test_lstms.py => test_rnns.py} (53%) diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py index e0dce1e212c2..ce048105ae8b 100755 --- a/python/tvm/relay/frontend/common.py +++ b/python/tvm/relay/frontend/common.py @@ -672,7 +672,7 @@ def gru_cell( ): """ Common implementation of GRU cell for all frontends of TVM - TODO(vvchernov): currently it is used by pytorch. Extend for other frontends + TODO(vvchernov): currently it is used by pytorch and ONNX. Extend for other frontends Parameters ---------- @@ -709,8 +709,7 @@ def gru_cell( xwt = _op.nn.dense(x_t, w_inp) if linear_before_reset: hwt = _op.nn.dense(hidden_state, w_hid) - # TODO(vvchernov): It is assumed that both bias are or not - if b_inp is not None: + if b_inp is not None and b_hid is not None: xwt += b_inp hwt += b_hid i_r, i_z, i_n = _op.split(xwt, 3, axis=-1) @@ -723,8 +722,7 @@ def gru_cell( w_hr, w_hz, w_hn = _op.split(w_hid, 3, axis=0) r_gate = i_r + _op.nn.dense(hidden_state, w_hr) z_gate = i_z + _op.nn.dense(hidden_state, w_hz) - # TODO(vvchernov): It is assumed that both bias are or not - if b_inp is not None: + if b_inp is not None and b_hid is not None: b_ir, b_iz, b_in = _op.split(b_inp, 3, axis=-1) b_hr, b_hz, b_hn = _op.split(b_hid, 3, axis=-1) r_gate += b_ir + b_hr diff --git a/tests/python/frontend/pytorch/test_lstms.py b/tests/python/frontend/pytorch/test_rnns.py similarity index 53% rename from tests/python/frontend/pytorch/test_lstms.py rename to tests/python/frontend/pytorch/test_rnns.py index 967245e1ef9d..18f8e984ac38 100644 --- a/tests/python/frontend/pytorch/test_lstms.py +++ b/tests/python/frontend/pytorch/test_rnns.py @@ -22,22 +22,101 @@ import onnx import io import sys -import pytest from tvm import relay from tvm.contrib import graph_executor from torch import nn -## Model parameters -model_feature_size = 16 -model_hidden_size = 32 -model_num_layers = 2 -seqs_length = 2 +## LSTM parameters +lstm_feature_size = 16 +lstm_hidden_size = 32 +lstm_num_layers = 2 projection_size = 20 + +## GRU parameters +gru_feature_size = 8 +gru_hidden_size = 16 +gru_num_layers = 2 + +seqs_length = 2 batch_size = 2 +class GRU_Model(nn.Module): + def __init__( + self, + device, + seq_len=seqs_length, + batch_size=batch_size, + feature_size=gru_feature_size, + hidden_size=gru_hidden_size, + batch_first=False, + layer_num=1, + bidirectional=False, + use_bias=True, + rnd_weights_init=False, + ): + super().__init__() + + self.batch_first = batch_first + self.seqs_length = seq_len + self.batch_size = batch_size + self.feature_size = feature_size + + self.gru = nn.GRU( + input_size=self.feature_size, + hidden_size=hidden_size, + num_layers=layer_num, + bidirectional=bidirectional, + batch_first=batch_first, + bias=use_bias, + ).to(device) + + if rnd_weights_init: + self.gen_rnd_weights() + + def forward(self, input, hidden_init=None): + """ + Computes the output tensor after input inference along GRU layer. + + :param input: batch of data as a tensor of shape (seqs_length, batch_size, feature_size) or (batch_size, seqs_length, feature_size) if self.batch_first = True + :param hidden_init: initial hidden state of the GRU as a tensor of shape (num_layers, batch_size, hidden_size). Will default to a tensor of zeros if None. + :return: the output tensor of shape (batch_size, hidden_size) + """ + out, hidden = self.gru(input, hidden_init) + + return out + + def gen_rnd_weights(self): + """ + Generate random weigths for the model with biases + For first uni- and bidirectional weights group: + Wi (3*hidden_size, feature_size) + Wh (3*hidden_size, hidden_size) + Bi (3*hidden_size) + Bh (3*hidden_size) + For other weights group: + Wi (3*hidden_size, hidden_size) + Wh (3*hidden_size, hidden_size) + Bi (3*hidden_size) + Bh (3*hidden_size) + For generation of random weigths for the model without biases the Bi and Bh weights are skipped + """ + with torch.no_grad(): + for weight_group in self.gru.all_weights: + for weight in weight_group: + weight.data = torch.rand(weight.shape) + + def get_dummy_input(self): + shape = [self.seqs_length, self.batch_size, self.feature_size] + if self.batch_first: + shape = [self.batch_size, self.seqs_length, self.feature_size] + res = torch.rand(shape) + + return res, shape + + def check_torch_version_for_proj_in_lstm(): """ proj_size parameter is supported in torch.nn.LSTM layer started from 1.8.0 torch version @@ -75,8 +154,8 @@ def __init__( if check_torch_version_for_proj_in_lstm(): self.lstm = nn.LSTM( - input_size=model_feature_size, - hidden_size=model_hidden_size, + input_size=lstm_feature_size, + hidden_size=lstm_hidden_size, num_layers=layer_num, bidirectional=bidirectional, proj_size=proj_size, @@ -91,8 +170,8 @@ def __init__( ) # sys.exit() self.lstm = nn.LSTM( - input_size=model_feature_size, - hidden_size=model_hidden_size, + input_size=lstm_feature_size, + hidden_size=lstm_hidden_size, num_layers=layer_num, bidirectional=bidirectional, batch_first=batch_first, @@ -106,9 +185,9 @@ def forward(self, input, hidden_init=None): """ Computes the output tensor after input inference along LSTM layer. - :param input: batch of data as a tensor of shape (seqs_length, batch_size, model_feature_size) or (batch_size, seqs_length, model_feature_size) if self.batch_first = True + :param input: batch of data as a tensor of shape (seqs_length, batch_size, lstm_feature_size) or (batch_size, seqs_length, lstm_feature_size) if self.batch_first = True :param hidden_init: initial hidden state of the LSTM as a tensor of shape (num_layers, batch_size, hidden_size). Will default to a tensor of zeros if None. - :return: the output tensor of shape (batch_size, model_hidden_size) + :return: the output tensor of shape (batch_size, lstm_hidden_size) """ # Pass the input through the LSTM layers and retrieve all outputs, the final hidden state # and the final cell state. @@ -121,49 +200,50 @@ def gen_rnd_weights(self): Generate random weigths for the model with biases Without projection: For first weights group: - Wi (4*model_hidden_size, model_feature_size) - Wh (4*model_hidden_size, model_hidden_size) - Bi (4*model_hidden_size) - Bh (4*model_hidden_size) + Wi (4*lstm_hidden_size, lstm_feature_size) + Wh (4*lstm_hidden_size, lstm_hidden_size) + Bi (4*lstm_hidden_size) + Bh (4*lstm_hidden_size) For first bidirectional weights group: - Wi (4*model_hidden_size, model_feature_size) - Wh (4*model_hidden_size, model_hidden_size) - Bi (4*model_hidden_size) - Bh (4*model_hidden_size) + Wi (4*lstm_hidden_size, lstm_feature_size) + Wh (4*lstm_hidden_size, lstm_hidden_size) + Bi (4*lstm_hidden_size) + Bh (4*lstm_hidden_size) For other weights group: - Wi (4*model_hidden_size, model_hidden_size) - Wh (4*model_hidden_size, model_hidden_size) - Bi (4*model_hidden_size) - Bh (4*model_hidden_size) + Wi (4*lstm_hidden_size, lstm_hidden_size) + Wh (4*lstm_hidden_size, lstm_hidden_size) + Bi (4*lstm_hidden_size) + Bh (4*lstm_hidden_size) With projection: For first weights group: - Wi (4*model_hidden_size, model_feature_size) - Wh (4*model_hidden_size, proj_size) - Bi (4*model_hidden_size) - Bh (4*model_hidden_size) - P (proj_size, model_hidden_size) + Wi (4*lstm_hidden_size, lstm_feature_size) + Wh (4*lstm_hidden_size, proj_size) + Bi (4*lstm_hidden_size) + Bh (4*lstm_hidden_size) + P (proj_size, lstm_hidden_size) For first bidirectional weights group: - Wi (4*model_hidden_size, model_feature_size) - Wh (4*model_hidden_size, proj_size) - Bi (4*model_hidden_size) - Bh (4*model_hidden_size) - P (proj_size, model_hidden_size) + Wi (4*lstm_hidden_size, lstm_feature_size) + Wh (4*lstm_hidden_size, proj_size) + Bi (4*lstm_hidden_size) + Bh (4*lstm_hidden_size) + P (proj_size, lstm_hidden_size) For other weights group: - Wi (4*model_hidden_size, proj_size * num_directions) - Wh (4*model_hidden_size, proj_size) - Bi (4*model_hidden_size) - Bh (4*model_hidden_size) - P (proj_size, model_hidden_size) + Wi (4*lstm_hidden_size, proj_size * num_directions) + Wh (4*lstm_hidden_size, proj_size) + Bi (4*lstm_hidden_size) + Bh (4*lstm_hidden_size) + P (proj_size, lstm_hidden_size) For generation of random weigths for the model without biases Bi and Bh are skipped """ - for weight_group in self.lstm.all_weights: - for weight in weight_group: - weight.data = torch.rand(weight.shape) + with torch.no_grad(): + for weight_group in self.lstm.all_weights: + for weight in weight_group: + weight.data = torch.rand(weight.shape) def get_dummy_input(self): - shape = [seqs_length, batch_size, model_feature_size] + shape = [seqs_length, batch_size, lstm_feature_size] if self.batch_first: - shape = [batch_size, seqs_length, model_feature_size] + shape = [batch_size, seqs_length, lstm_feature_size] res = torch.rand(shape) return res, shape @@ -173,6 +253,115 @@ def compare(input, gold_data, rtol=1e-5, atol=1e-5): tvm.testing.assert_allclose(input, gold_data, rtol=rtol, atol=atol) +def check_gru_with_type(gru_type, target=tvm.target.Target("llvm -mcpu=core-avx2"), dev=tvm.cpu(0)): + device = torch.device("cpu") + hidden_layers_num = 1 + model = None + for batch_first in (True, False): + for use_bias in (True, False): + for rnd_weights in [True]: # (True, False): + if gru_type == "uni": + model = GRU_Model( + device, + batch_first=batch_first, + rnd_weights_init=rnd_weights, + use_bias=use_bias, + ) + elif gru_type == "b": + model = GRU_Model( + device, + batch_first=batch_first, + bidirectional=True, + rnd_weights_init=rnd_weights, + use_bias=use_bias, + ) + hidden_layers_num = 2 + elif gru_type == "s": + model = GRU_Model( + device, + batch_first=batch_first, + layer_num=gru_num_layers, + rnd_weights_init=rnd_weights, + use_bias=use_bias, + ) + hidden_layers_num = gru_num_layers + elif gru_type == "sb": + model = GRU_Model( + device, + batch_first=batch_first, + bidirectional=True, + layer_num=gru_num_layers, + rnd_weights_init=rnd_weights, + use_bias=use_bias, + ) + hidden_layers_num = 2 * gru_num_layers + else: + print("WARNING: GRU type {} is not supported here!".format(gru_type)) + return + + model.eval() + + # Get golden output from original model + input_hidden_shape = (hidden_layers_num, batch_size, gru_hidden_size) + dummy_input, input_shape = model.get_dummy_input() + golden_output_batch = model.forward(dummy_input.to(device)).detach().cpu().numpy() + + dtype = "float32" + h_zeros = np.zeros(input_hidden_shape, dtype=dtype) + + tvm_output = None + for format in ["ts"]: # ["ts", "onnx"]: + if format == "ts": + # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing. + traced_script_module = torch.jit.trace(model, dummy_input).eval() + + # Import model to Relay + shape_list = [("input", input_shape)] + mod, params = relay.frontend.from_pytorch(traced_script_module, shape_list) + + # Model compilation by tvm + with tvm.transform.PassContext(opt_level=3): + lib = relay.build(mod, target=target, params=params) + elif format == "onnx": + onnx_io = io.BytesIO() + with torch.no_grad(): + h0 = torch.rand(input_hidden_shape) + input_names = ["input", "h0"] + + # default export (without dynamic input) + torch.onnx.export( + model, (dummy_input, h0), onnx_io, input_names=input_names + ) + onnx_io.seek(0, 0) + onnx_model = onnx.load_model(onnx_io) + + # Import model to Relay + shape_dict = { + "input": input_shape, + "h0": input_hidden_shape, + } + mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) + + # Model compilation by tvm + with tvm.transform.PassContext(opt_level=1): + lib = relay.build(mod, target=target, params=params) + + # Inference of the model with given input data + m = graph_executor.GraphModule(lib["default"](dev)) + + # Set inputs + m.set_input( + input=tvm.nd.array(dummy_input.numpy().astype(dtype)), + h0=tvm.nd.array(h_zeros), + ) + # Execute + m.run() + # Get outputs (converted to numpy array) + tvm_output = m.get_output(0).numpy() + + compare(tvm_output, golden_output_batch) + + def check_lstm_with_type( lstm_type, target=tvm.target.Target("llvm -mcpu=core-avx2"), dev=tvm.cpu(0) ): @@ -212,31 +401,31 @@ def check_lstm_with_type( model = LSTM_Model( device, batch_first=batch_first, - layer_num=model_num_layers, + layer_num=lstm_num_layers, rnd_weights_init=rnd_weights, use_bias=use_bias, ) - hidden_layers_num = model_num_layers + hidden_layers_num = lstm_num_layers elif lstm_type == "sb": model = LSTM_Model( device, batch_first=batch_first, bidirectional=True, - layer_num=model_num_layers, + layer_num=lstm_num_layers, rnd_weights_init=rnd_weights, use_bias=use_bias, ) - hidden_layers_num = 2 * model_num_layers + hidden_layers_num = 2 * lstm_num_layers elif lstm_type == "sp": model = LSTM_Model( device, batch_first=batch_first, - layer_num=model_num_layers, + layer_num=lstm_num_layers, proj_size=projection_size, rnd_weights_init=rnd_weights, use_bias=use_bias, ) - hidden_layers_num = model_num_layers + hidden_layers_num = lstm_num_layers elif lstm_type == "bp": model = LSTM_Model( device, @@ -252,12 +441,12 @@ def check_lstm_with_type( device, batch_first=batch_first, bidirectional=True, - layer_num=model_num_layers, + layer_num=lstm_num_layers, proj_size=projection_size, rnd_weights_init=rnd_weights, use_bias=use_bias, ) - hidden_layers_num = 2 * model_num_layers + hidden_layers_num = 2 * lstm_num_layers else: print("WARNING: LSTM type {} is not supported here!".format(lstm_type)) return @@ -265,7 +454,7 @@ def check_lstm_with_type( model.eval() # Get golden output from original model - input_hidden_shape = (hidden_layers_num, batch_size, model_hidden_size) + input_hidden_shape = (hidden_layers_num, batch_size, lstm_hidden_size) input_hidden_shape_with_proj = (hidden_layers_num, batch_size, projection_size) dummy_input, input_shape = model.get_dummy_input() golden_output_batch = model.forward(dummy_input.to(device)).detach().cpu().numpy() @@ -346,6 +535,15 @@ def check_lstm_with_type( compare(tvm_output, golden_output_batch) +@tvm.testing.uses_gpu +def test_grus(): + for target, dev in tvm.testing.enabled_targets(): + check_gru_with_type("uni", target, dev) + check_gru_with_type("s", target, dev) + check_gru_with_type("b", target, dev) + check_gru_with_type("sb", target, dev) + + @tvm.testing.uses_gpu def test_lstms(): for target, dev in tvm.testing.enabled_targets(): @@ -361,3 +559,4 @@ def test_lstms(): if __name__ == "__main__": test_lstms() + test_grus() From c722e27739d77058915a18e42764852fd7a4e1fa Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Tue, 24 Aug 2021 12:36:55 +0300 Subject: [PATCH 9/9] tests for RNN layers was unified for pytorch frontend --- tests/python/frontend/pytorch/test_rnns.py | 544 ++++++++------------- 1 file changed, 206 insertions(+), 338 deletions(-) diff --git a/tests/python/frontend/pytorch/test_rnns.py b/tests/python/frontend/pytorch/test_rnns.py index 18f8e984ac38..b5784a6fe1e1 100644 --- a/tests/python/frontend/pytorch/test_rnns.py +++ b/tests/python/frontend/pytorch/test_rnns.py @@ -17,7 +17,6 @@ import tvm import tvm.testing -import numpy as np import torch import onnx import io @@ -31,22 +30,72 @@ ## LSTM parameters lstm_feature_size = 16 lstm_hidden_size = 32 -lstm_num_layers = 2 -projection_size = 20 +lstm_projection_size = 20 ## GRU parameters gru_feature_size = 8 gru_hidden_size = 16 -gru_num_layers = 2 +num_layers = 2 seqs_length = 2 batch_size = 2 -class GRU_Model(nn.Module): +class RNN_Model(nn.Module): + """ + It is base class for RNN layer classes. + It contains some common fields and methods for child classes. + """ + + def __init__( + self, + ): + super().__init__() + + # model is defined in child class + self.model = None + + def forward(self, input, hidden_init=None): + """ + Computes the output tensor after input inference along RNN layer. + + :param input: batch of data as a tensor of shape (seqs_length, batch_size, feature_size) or (batch_size, seqs_length, feature_size) if self.batch_first = True + :param hidden_init: initial hidden state(s) of the RNN as a tensor(s) of shape (num_layers, batch_size, hidden_size). Will default to a tensor of zeros if None. + :return: the output tensor of shape (batch_size, hidden_size) + """ + if self.model is None: + raise NotImplementedError("self.model must be defined in subclasses!") + out, _ = self.model(input, hidden_init) + + return out + + def gen_rnd_weights(self): + """ + Generate random weigths for the model + """ + if self.model is None: + raise NotImplementedError("self.model must be defined in subclasses!") + with torch.no_grad(): + for weight_group in self.model.all_weights: + for weight in weight_group: + weight.data = torch.rand(weight.shape) + + def get_dummy_inputs(self): + raise NotImplementedError("subclasses must override get_dummy_inputs()!") + + def get_input_names(self): + raise NotImplementedError("subclasses must override get_input_names()!") + + def get_shape_desc(self, frontend_type): + raise NotImplementedError("subclasses must override get_shape_desc(frontend_type)!") + + def get_tvm_inputs(self, dtype): + raise NotImplementedError("subclasses must override get_tvm_inputs(dtype)!") + + +class GRU_Model(RNN_Model): def __init__( self, - device, seq_len=seqs_length, batch_size=batch_size, feature_size=gru_feature_size, @@ -59,35 +108,27 @@ def __init__( ): super().__init__() - self.batch_first = batch_first - self.seqs_length = seq_len - self.batch_size = batch_size - self.feature_size = feature_size - - self.gru = nn.GRU( - input_size=self.feature_size, + # Shapes + self.shape = [seq_len, batch_size, feature_size] + if batch_first: + self.shape = [batch_size, seq_len, feature_size] + layers_num = 2 * layer_num if bidirectional else layer_num + self.h0_shape = [layers_num, batch_size, hidden_size] + # Dummy inputs + self.dummy_inputs = (torch.rand(self.shape), torch.zeros(self.h0_shape)) + + self.model = nn.GRU( + input_size=feature_size, hidden_size=hidden_size, num_layers=layer_num, bidirectional=bidirectional, batch_first=batch_first, bias=use_bias, - ).to(device) + ) if rnd_weights_init: self.gen_rnd_weights() - def forward(self, input, hidden_init=None): - """ - Computes the output tensor after input inference along GRU layer. - - :param input: batch of data as a tensor of shape (seqs_length, batch_size, feature_size) or (batch_size, seqs_length, feature_size) if self.batch_first = True - :param hidden_init: initial hidden state of the GRU as a tensor of shape (num_layers, batch_size, hidden_size). Will default to a tensor of zeros if None. - :return: the output tensor of shape (batch_size, hidden_size) - """ - out, hidden = self.gru(input, hidden_init) - - return out - def gen_rnd_weights(self): """ Generate random weigths for the model with biases @@ -103,18 +144,30 @@ def gen_rnd_weights(self): Bh (3*hidden_size) For generation of random weigths for the model without biases the Bi and Bh weights are skipped """ - with torch.no_grad(): - for weight_group in self.gru.all_weights: - for weight in weight_group: - weight.data = torch.rand(weight.shape) + super().gen_rnd_weights() - def get_dummy_input(self): - shape = [self.seqs_length, self.batch_size, self.feature_size] - if self.batch_first: - shape = [self.batch_size, self.seqs_length, self.feature_size] - res = torch.rand(shape) + def get_dummy_inputs(self): + return self.dummy_inputs - return res, shape + def get_input_names(self): + return ["input", "h0"] + + def get_shape_desc(self, frontend_type): + shape_desc = None + if frontend_type == "pt": # PyTorch + shape_desc = [("input", self.shape)] + elif frontend_type == "onnx": # ONNX + shape_desc = { + "input": self.shape, + "h0": self.h0_shape, + } + return shape_desc + + def get_tvm_inputs(self, dtype): + return { + "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)), + "h0": tvm.nd.array(self.dummy_inputs[1].numpy().astype(dtype)), + } def check_torch_version_for_proj_in_lstm(): @@ -135,10 +188,13 @@ def check_torch_version_for_proj_in_lstm(): return me -class LSTM_Model(nn.Module): +class LSTM_Model(RNN_Model): def __init__( self, - device, + seq_len=seqs_length, + batch_size=batch_size, + feature_size=lstm_feature_size, + hidden_size=lstm_hidden_size, batch_first=False, layer_num=1, bidirectional=False, @@ -148,12 +204,23 @@ def __init__( ): super().__init__() - self.device = device - self.batch_first = batch_first - self.use_bias = use_bias + # Shapes + self.shape = [seq_len, batch_size, feature_size] + if batch_first: + self.shape = [batch_size, seq_len, feature_size] + layers_num = 2 * layer_num if bidirectional else layer_num + self.h0_shape = [layers_num, batch_size, hidden_size] + if proj_size > 0: + self.h0_shape = [layers_num, batch_size, proj_size] + self.c0_shape = [layers_num, batch_size, hidden_size] + # Dummy inputs + self.dummy_inputs = ( + torch.rand(self.shape), + (torch.zeros(self.h0_shape), torch.zeros(self.c0_shape)), + ) if check_torch_version_for_proj_in_lstm(): - self.lstm = nn.LSTM( + self.model = nn.LSTM( input_size=lstm_feature_size, hidden_size=lstm_hidden_size, num_layers=layer_num, @@ -161,7 +228,7 @@ def __init__( proj_size=proj_size, batch_first=batch_first, bias=use_bias, - ).to(device) + ) else: if proj_size > 0: print( @@ -169,32 +236,18 @@ def __init__( "LSTM was constructed without projection!", ) # sys.exit() - self.lstm = nn.LSTM( + self.model = nn.LSTM( input_size=lstm_feature_size, hidden_size=lstm_hidden_size, num_layers=layer_num, bidirectional=bidirectional, batch_first=batch_first, bias=use_bias, - ).to(device) + ) if rnd_weights_init: self.gen_rnd_weights() - def forward(self, input, hidden_init=None): - """ - Computes the output tensor after input inference along LSTM layer. - - :param input: batch of data as a tensor of shape (seqs_length, batch_size, lstm_feature_size) or (batch_size, seqs_length, lstm_feature_size) if self.batch_first = True - :param hidden_init: initial hidden state of the LSTM as a tensor of shape (num_layers, batch_size, hidden_size). Will default to a tensor of zeros if None. - :return: the output tensor of shape (batch_size, lstm_hidden_size) - """ - # Pass the input through the LSTM layers and retrieve all outputs, the final hidden state - # and the final cell state. - out, (hidden, cell) = self.lstm(input, hidden_init) - - return out - def gen_rnd_weights(self): """ Generate random weigths for the model with biases @@ -235,328 +288,143 @@ def gen_rnd_weights(self): P (proj_size, lstm_hidden_size) For generation of random weigths for the model without biases Bi and Bh are skipped """ - with torch.no_grad(): - for weight_group in self.lstm.all_weights: - for weight in weight_group: - weight.data = torch.rand(weight.shape) - - def get_dummy_input(self): - shape = [seqs_length, batch_size, lstm_feature_size] - if self.batch_first: - shape = [batch_size, seqs_length, lstm_feature_size] - res = torch.rand(shape) - - return res, shape + super().gen_rnd_weights() + + def get_dummy_inputs(self): + return self.dummy_inputs + + def get_input_names(self): + return ["input", "h0", "c0"] + + def get_shape_desc(self, frontend_type): + shape_desc = None + if frontend_type == "pt": # PyTorch + shape_desc = [("input", self.shape)] + elif frontend_type == "onnx": # ONNX + shape_desc = { + "input": self.shape, + "h0": self.h0_shape, + "c0": self.c0_shape, + } + return shape_desc + + def get_tvm_inputs(self, dtype): + return { + "input": tvm.nd.array(self.dummy_inputs[0].numpy().astype(dtype)), + "h0": tvm.nd.array(self.dummy_inputs[1][0].numpy().astype(dtype)), + "c0": tvm.nd.array(self.dummy_inputs[1][1].numpy().astype(dtype)), + } def compare(input, gold_data, rtol=1e-5, atol=1e-5): tvm.testing.assert_allclose(input, gold_data, rtol=rtol, atol=atol) -def check_gru_with_type(gru_type, target=tvm.target.Target("llvm -mcpu=core-avx2"), dev=tvm.cpu(0)): - device = torch.device("cpu") - hidden_layers_num = 1 - model = None - for batch_first in (True, False): - for use_bias in (True, False): - for rnd_weights in [True]: # (True, False): - if gru_type == "uni": - model = GRU_Model( - device, - batch_first=batch_first, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - elif gru_type == "b": - model = GRU_Model( - device, - batch_first=batch_first, - bidirectional=True, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = 2 - elif gru_type == "s": - model = GRU_Model( - device, - batch_first=batch_first, - layer_num=gru_num_layers, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = gru_num_layers - elif gru_type == "sb": - model = GRU_Model( - device, - batch_first=batch_first, - bidirectional=True, - layer_num=gru_num_layers, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = 2 * gru_num_layers - else: - print("WARNING: GRU type {} is not supported here!".format(gru_type)) - return - - model.eval() - - # Get golden output from original model - input_hidden_shape = (hidden_layers_num, batch_size, gru_hidden_size) - dummy_input, input_shape = model.get_dummy_input() - golden_output_batch = model.forward(dummy_input.to(device)).detach().cpu().numpy() - - dtype = "float32" - h_zeros = np.zeros(input_hidden_shape, dtype=dtype) - - tvm_output = None - for format in ["ts"]: # ["ts", "onnx"]: - if format == "ts": - # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing. - traced_script_module = torch.jit.trace(model, dummy_input).eval() - - # Import model to Relay - shape_list = [("input", input_shape)] - mod, params = relay.frontend.from_pytorch(traced_script_module, shape_list) - - # Model compilation by tvm - with tvm.transform.PassContext(opt_level=3): - lib = relay.build(mod, target=target, params=params) - elif format == "onnx": - onnx_io = io.BytesIO() - with torch.no_grad(): - h0 = torch.rand(input_hidden_shape) - input_names = ["input", "h0"] - - # default export (without dynamic input) - torch.onnx.export( - model, (dummy_input, h0), onnx_io, input_names=input_names - ) - onnx_io.seek(0, 0) - onnx_model = onnx.load_model(onnx_io) - - # Import model to Relay - shape_dict = { - "input": input_shape, - "h0": input_hidden_shape, - } - mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) - - # Model compilation by tvm - with tvm.transform.PassContext(opt_level=1): - lib = relay.build(mod, target=target, params=params) - - # Inference of the model with given input data - m = graph_executor.GraphModule(lib["default"](dev)) - - # Set inputs - m.set_input( - input=tvm.nd.array(dummy_input.numpy().astype(dtype)), - h0=tvm.nd.array(h_zeros), - ) - # Execute - m.run() - # Get outputs (converted to numpy array) - tvm_output = m.get_output(0).numpy() - - compare(tvm_output, golden_output_batch) +def check_rnn(rnn_type, rnn_mod, target=tvm.target.Target("llvm -mcpu=core-avx2"), dev=tvm.cpu(0)): + def get_model( + rnn_type, + rnn_mod, + args, + ): + # Fill args + if "b" in rnn_mod: + args["bidirectional"] = True + if "s" in rnn_mod: + args["layer_num"] = num_layers + + if rnn_type == "GRU": + RNN_Model_selector = GRU_Model + elif rnn_type == "LSTM": + RNN_Model_selector = LSTM_Model + if "p" in rnn_mod: + args["proj_size"] = lstm_projection_size + + return RNN_Model_selector(**args) + + def get_onnx_model(model): + onnx_io = io.BytesIO() + with torch.no_grad(): + input_names = model.get_input_names() + inputs = model.get_dummy_inputs() + # default export (without dynamic input) + torch.onnx.export(model, inputs, onnx_io, input_names=input_names) -def check_lstm_with_type( - lstm_type, target=tvm.target.Target("llvm -mcpu=core-avx2"), dev=tvm.cpu(0) -): - has_proj = "p" in lstm_type + onnx_io.seek(0, 0) + return onnx.load_model(onnx_io) - device = torch.device("cpu") - hidden_layers_num = 1 model = None + dtype = "float32" + device = torch.device("cpu") for batch_first in (True, False): for use_bias in (True, False): for rnd_weights in [True]: # (True, False): - if lstm_type == "uni": - model = LSTM_Model( - device, - batch_first=batch_first, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - elif lstm_type == "b": - model = LSTM_Model( - device, - batch_first=batch_first, - bidirectional=True, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = 2 - elif lstm_type == "p": - model = LSTM_Model( - device, - batch_first=batch_first, - proj_size=projection_size, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - elif lstm_type == "s": - model = LSTM_Model( - device, - batch_first=batch_first, - layer_num=lstm_num_layers, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = lstm_num_layers - elif lstm_type == "sb": - model = LSTM_Model( - device, - batch_first=batch_first, - bidirectional=True, - layer_num=lstm_num_layers, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = 2 * lstm_num_layers - elif lstm_type == "sp": - model = LSTM_Model( - device, - batch_first=batch_first, - layer_num=lstm_num_layers, - proj_size=projection_size, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = lstm_num_layers - elif lstm_type == "bp": - model = LSTM_Model( - device, - batch_first=batch_first, - bidirectional=True, - proj_size=projection_size, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = 2 - elif lstm_type == "sbp": - model = LSTM_Model( - device, - batch_first=batch_first, - bidirectional=True, - layer_num=lstm_num_layers, - proj_size=projection_size, - rnd_weights_init=rnd_weights, - use_bias=use_bias, - ) - hidden_layers_num = 2 * lstm_num_layers - else: - print("WARNING: LSTM type {} is not supported here!".format(lstm_type)) - return - + model_inputs = { + "batch_first": batch_first, + "use_bias": use_bias, + "rnd_weights_init": rnd_weights, + } + model = get_model(rnn_type, rnn_mod, model_inputs) + model.to(device) model.eval() # Get golden output from original model - input_hidden_shape = (hidden_layers_num, batch_size, lstm_hidden_size) - input_hidden_shape_with_proj = (hidden_layers_num, batch_size, projection_size) - dummy_input, input_shape = model.get_dummy_input() - golden_output_batch = model.forward(dummy_input.to(device)).detach().cpu().numpy() - - dtype = "float32" - h_zeros = np.zeros(input_hidden_shape, dtype=dtype) - if has_proj: - h_zeros = np.zeros(input_hidden_shape_with_proj, dtype=dtype) - c_zeros = np.zeros(input_hidden_shape, dtype=dtype) + dummy_inputs = model.get_dummy_inputs() + golden_output = model.forward(dummy_inputs[0].to(device)).detach().cpu().numpy() tvm_output = None - for format in ["ts"]: # ["ts", "onnx"]: - if format == "ts": + for format in ["pt"]: # ["pt", "onnx"]: + shape_desc = model.get_shape_desc(format) + if format == "pt": # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing. - traced_script_module = torch.jit.trace(model, dummy_input).eval() + traced_script_module = torch.jit.trace(model, dummy_inputs[0]).eval() # Import model to Relay - shape_list = [("input", input_shape)] - mod, params = relay.frontend.from_pytorch(traced_script_module, shape_list) - - # Model compilation by tvm - with tvm.transform.PassContext(opt_level=3): - lib = relay.build(mod, target=target, params=params) + mod, params = relay.frontend.from_pytorch(traced_script_module, shape_desc) elif format == "onnx": - if has_proj: + try: + onnx_model = get_onnx_model(model) + except: print( "WARNING: torch.onnx.export does not support conversion LSTM with projection " "from pytorch! TODO: waiting for the support and correct test after that." ) continue - onnx_io = io.BytesIO() - with torch.no_grad(): - h0 = torch.rand(input_hidden_shape) - if has_proj: - h0 = torch.rand(input_hidden_shape_with_proj) - c0 = torch.rand(input_hidden_shape) - input_names = ["input", "h0", "c0"] - - # default export (without dynamic input) - torch.onnx.export( - model, (dummy_input, (h0, c0)), onnx_io, input_names=input_names - ) - onnx_io.seek(0, 0) - onnx_model = onnx.load_model(onnx_io) # Import model to Relay - shape_dict = { - "input": input_shape, - "h0": input_hidden_shape, - "c0": input_hidden_shape, - } - if has_proj: - shape_dict = { - "input": input_shape, - "h0": input_hidden_shape_with_proj, - "c0": input_hidden_shape, - } - mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) - - # Model compilation by tvm - with tvm.transform.PassContext(opt_level=1): - lib = relay.build(mod, target=target, params=params) + mod, params = relay.frontend.from_onnx(onnx_model, shape_desc) + + # Model compilation by tvm + with tvm.transform.PassContext(opt_level=3): + lib = relay.build(mod, target=target, params=params) # Inference of the model with given input data m = graph_executor.GraphModule(lib["default"](dev)) # Set inputs - m.set_input( - input=tvm.nd.array(dummy_input.numpy().astype(dtype)), - h0=tvm.nd.array(h_zeros), - c0=tvm.nd.array(c_zeros), - ) + tvm_inputs = model.get_tvm_inputs(dtype) + m.set_input(**tvm_inputs) # Execute m.run() # Get outputs (converted to numpy array) tvm_output = m.get_output(0).numpy() - compare(tvm_output, golden_output_batch) - - -@tvm.testing.uses_gpu -def test_grus(): - for target, dev in tvm.testing.enabled_targets(): - check_gru_with_type("uni", target, dev) - check_gru_with_type("s", target, dev) - check_gru_with_type("b", target, dev) - check_gru_with_type("sb", target, dev) + compare(tvm_output, golden_output) @tvm.testing.uses_gpu -def test_lstms(): +def test_rnns(): for target, dev in tvm.testing.enabled_targets(): - check_lstm_with_type("uni", target, dev) - # check_lstm_with_type("p", target, dev) - check_lstm_with_type("s", target, dev) - check_lstm_with_type("b", target, dev) - # check_lstm_with_type("bp", target, dev) - # check_lstm_with_type("sp", target, dev) - check_lstm_with_type("sb", target, dev) - # check_lstm_with_type("sbp", target, dev) + # RNN types: GRU, LSTM + # GRU modifications: unidirectional, stacked, bidirectional, stacked bidirectional + for mod_type in ["uni", "s", "b", "sb"]: + check_rnn("GRU", mod_type, target, dev) + # LSTM modifications: unidirectional, stacked, bidirectional, stacked bidirectional, + # and all these types with projection ("p", "sp", "bp", "sbp") + # The latter are skiped for test acceleration + for mod_type in ["uni", "s", "b", "sb"]: + check_rnn("LSTM", mod_type, target, dev) if __name__ == "__main__": - test_lstms() - test_grus() + test_rnns()