Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 33 additions & 12 deletions nnvm/python/nnvm/frontend/keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ def _convert_dense(insym, keras_layer, symtab):
if keras_layer.use_bias:
params['use_bias'] = True
params['bias'] = symtab.new_const(weightList[1])
input_shape = keras_layer.input_shape
input_dim = len(input_shape)
# In case of RNN dense, input shape will be (1, 1, n)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current version doesn't have a check of input_shape[1] != 1, so the input shape will be (1, m, n)?

if input_dim > 2:
input_shape = tuple(dim if dim else 1 for dim in _as_list(input_shape)[0])
if input_dim != 3 or input_shape[0] != 1 or input_shape[1] != 1:
raise ValueError("Cannot flatten the inputs with shape.", input_shape, " for dense.")
insym = _sym.squeeze(insym, axis=0)
out = _sym.dense(data=insym, **params)
# defuse activation
if sys.version_info.major < 3:
Expand All @@ -139,6 +147,8 @@ def _convert_dense(insym, keras_layer, symtab):
act_type = keras_layer.activation.__name__
if act_type != 'linear':
out = _convert_activation(out, act_type, symtab)
if input_dim > 2:
out = _sym.expand_dims(out, axis=0)
return out


Expand Down Expand Up @@ -408,27 +418,32 @@ def _convert_lstm(insym, keras_layer, symtab):
insym = [insym, h_sym, c_sym]

in_data = insym[0]
in_state_h = insym[1]
in_state_c = insym[2]
next_h = insym[1]
next_c = insym[2]

weightList = keras_layer.get_weights()
inp_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.input_shape)[0])

kernel_wt = symtab.new_const(weightList[0].transpose([1, 0]))
recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0]))
in_bias = symtab.new_const(weightList[2])

units = list(weightList[0].shape)[1]

in_data = _sym.flatten(in_data)
ixh1 = _sym.dense(in_data, kernel_wt, use_bias=False, units=units)
ixh2 = _sym.dense(in_state_h, recurrent_wt, in_bias, use_bias=True, units=units)
gate = ixh1 + ixh2
gates = _sym.split(gate, indices_or_sections=4, axis=1)
in_gate = _convert_recurrent_activation(gates[0], keras_layer)
in_transform = _convert_recurrent_activation(gates[1], keras_layer)
next_c = in_transform * in_state_c + in_gate * _convert_activation(gates[2], keras_layer, None)
out_gate = _convert_recurrent_activation(gates[3], keras_layer)
next_h = out_gate * _convert_activation(next_c, keras_layer, None)
time_steps = inp_shape[1]
in_data = _sym.squeeze(in_data, axis=0)
in_data = _sym.split(in_data, indices_or_sections=time_steps, axis=0)
#loop for the number of time_steps
for data in in_data:
ixh1 = _sym.dense(data, kernel_wt, use_bias=False, units=units)
ixh2 = _sym.dense(next_h, recurrent_wt, in_bias, use_bias=True, units=units)
gate = ixh1 + ixh2
gates = _sym.split(gate, indices_or_sections=4, axis=1)
in_gate = _convert_recurrent_activation(gates[0], keras_layer)
in_transform = _convert_recurrent_activation(gates[1], keras_layer)
next_c = in_transform * next_c + in_gate * _convert_activation(gates[2], keras_layer, None)
out_gate = _convert_recurrent_activation(gates[3], keras_layer)
next_h = out_gate * _convert_activation(next_c, keras_layer, None)

out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0])
out = _sym.reshape(next_h, shape=out_shape)
Expand Down Expand Up @@ -656,6 +671,12 @@ def from_keras(model):
raise TypeError("Unknown layer type or unsupported Keras version : {}"
.format(keras_layer))
for node_idx, node in enumerate(inbound_nodes):
# If some nodes in imported model is not relevant to the current model,
# skip such layers. model._network_nodes contains keys of all nodes relevant
# to the current model.
if not model._node_key(keras_layer, node_idx) in model._network_nodes:
continue

insym = []

# Since Keras allows creating multiple layers from the same name instance,
Expand Down
20 changes: 15 additions & 5 deletions nnvm/tests/python/frontend/keras/test_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,23 @@ def test_forward_elemwise_add():
verify_keras_frontend(keras_model)


def test_forward_dense():
def _test_forward_dense():
data = keras.layers.Input(shape=(32,32,1))
x = keras.layers.Flatten()(data)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(x)
keras_model = keras.models.Model(data, x)
verify_keras_frontend(keras_model)

def _test_forward_dense_with_3d_inp():
data = keras.layers.Input(shape=(1, 20))
x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(data)
keras_model = keras.models.Model(data, x)
verify_keras_frontend(keras_model, need_transpose=False)

def test_forward_dense():
_test_forward_dense()
_test_forward_dense_with_3d_inp()

def test_forward_pool():
data = keras.layers.Input(shape=(32,32,1))
Expand Down Expand Up @@ -226,8 +235,8 @@ def test_forward_reuse_layers():
keras_model = keras.models.Model(data, z)
verify_keras_frontend(keras_model)

def _test_LSTM(inputs, hidden, return_state=True):
data = keras.layers.Input(shape=(1, inputs))
def _test_LSTM(time_steps, inputs, hidden, return_state=True):
data = keras.layers.Input(shape=(time_steps, inputs))
lstm_out = keras.layers.LSTM(hidden,
return_state=return_state,
recurrent_activation='sigmoid',
Expand All @@ -250,8 +259,9 @@ def _test_LSTM_MultiLayer(inputs, hidden):


def test_forward_LSTM():
_test_LSTM(8, 8, return_state=True)
_test_LSTM(4, 4, return_state=False)
_test_LSTM(1, 8, 8, return_state=True)
_test_LSTM(1, 4, 4, return_state=False)
_test_LSTM(20, 16, 256, return_state=False)
_test_LSTM_MultiLayer(4, 4)

def _test_RNN(inputs, units):
Expand Down
238 changes: 238 additions & 0 deletions tutorials/nnvm/nlp/keras_s2s_translate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
"""
Keras LSTM Sequence to Sequence Model for Translation
=================================
**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_

This script demonstrates how to implement a basic character-level sequence-to-sequence model.
We apply it to translating short English sentences into short French sentences,
character-by-character.

# Summary of the algorithm

- We start with input sequences from a domain (e.g. English sentences)
and corresponding target sequences from another domain
(e.g. French sentences).
- An encoder LSTM turns input sequences to 2 state vectors
(we keep the last LSTM state and discard the outputs).
- A decoder LSTM is trained to turn the target sequences into
the same sequence but offset by one timestep in the future,
a training process called "teacher forcing" in this context.
Is uses as initial state the state vectors from the encoder.
Effectively, the decoder learns to generate `targets[t+1...]`
given `targets[...t]`, conditioned on the input sequence.

This script loads the s2s.h5 model saved in repository
https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/lstm_seq2seq.py
and generates sequences from it. It assumes that no changes have been made (for example:
latent_dim is unchanged, and the input data and model architecture are unchanged).

# References

- Sequence to Sequence Learning with Neural Networks
https://arxiv.org/abs/1409.3215
- Learning Phrase Representations using
RNN Encoder-Decoder for Statistical Machine Translation
https://arxiv.org/abs/1406.1078

See lstm_seq2seq.py for more details on the model architecture and how it is trained.
"""

from keras.models import Model, load_model
from keras.layers import Input
import random
import os
import numpy as np
import keras
import tvm
import nnvm

######################################################################
# Download required files
# -----------------------
# Download files listed below from dmlc web-data repo.
model_file = "s2s_translate.h5"
data_file = "fra-eng.txt"

# Base location for model related files.
repo_base = 'https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/'
model_url = os.path.join(repo_base, model_file)
data_url = os.path.join(repo_base, data_file)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The model and data files are not found in the repository yet. Can you share them?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


# Download files listed below.
from mxnet.gluon.utils import download
download(model_url, model_file)
download(data_url, model_file)

latent_dim = 256 # Latent dimensionality of the encoding space.
test_samples = 10000 # Number of samples used for testing.

######################################################################
# Process the data file
# ---------------------
# Vectorize the data. We use the same approach as the training script.
# NOTE: the data must be identical, in order for the character -> integer
# mappings to be consistent.
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
with open(data_file, 'r', encoding='utf-8') as f:
lines = f.read().split('\n')
test_samples = min(test_samples, len(lines))
max_encoder_seq_length = 0
max_decoder_seq_length = 0
for line in lines[:test_samples]:
input_text, target_text = line.split('\t')
# We use "tab" as the "start sequence" character
# for the targets, and "\n" as "end sequence" character.
target_text = '\t' + target_text + '\n'
max_encoder_seq_length = max(max_encoder_seq_length, len(input_text))
max_decoder_seq_length = max(max_decoder_seq_length, len(target_text))
for char in input_text:
if char not in input_characters:
input_characters.add(char)
for char in target_text:
if char not in target_characters:
target_characters.add(char)

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
input_token_index = dict(
[(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict(
[(char, i) for i, char in enumerate(target_characters)])

# Reverse-lookup token index to decode sequences back to something readable.
reverse_target_char_index = dict(
(i, char) for char, i in target_token_index.items())

######################################################################
# Load Keras Model
# ----------------
# Restore the model and construct the encoder and decoder.
model = load_model(model_file)
encoder_inputs = model.input[0] # input_1

encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = Model(encoder_inputs, encoder_states)

decoder_inputs = model.input[1] # input_2
decoder_state_input_h = Input(shape=(latent_dim,), name='input_3')
decoder_state_input_c = Input(shape=(latent_dim,), name='input_4')
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_lstm = model.layers[3]
decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h_dec, state_c_dec]
decoder_dense = model.layers[4]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs] + decoder_states)

######################################################################
# Compile both encoder and decoder model on NNVM
# ----------------------------------------------
# Creates NNVM graph definition from keras model file.
from tvm.contrib import graph_runtime
target = 'llvm'
ctx = tvm.cpu(0)

# Parse Encoder model
sym, params = nnvm.frontend.from_keras(encoder_model)
inp_enc_shape = (1, max_encoder_seq_length, num_encoder_tokens)
shape_dict = {'input_1': inp_enc_shape}

# Build Encoder model
with nnvm.compiler.build_config(opt_level=2):
enc_graph, enc_lib, enc_params = nnvm.compiler.build(sym, target, shape_dict, params=params)
print("Encoder build ok.")

# Create graph runtime for encoder model
tvm_enc = graph_runtime.create(enc_graph, enc_lib, ctx)
tvm_enc.set_input(**enc_params)

# Parse Decoder model
inp_dec_shape = (1, 1, num_decoder_tokens)
shape_dict = {'input_2': inp_dec_shape,
'input_3': (1, latent_dim),
'input_4': (1, latent_dim)}

# Build Decoder model
sym, params = nnvm.frontend.from_keras(decoder_model)
with nnvm.compiler.build_config(opt_level=2):
dec_graph, dec_lib, dec_params = nnvm.compiler.build(sym, target, shape_dict, params=params)
print("Decoder build ok.")

# Create graph runtime for decoder model
tvm_dec = graph_runtime.create(dec_graph, dec_lib, ctx)
tvm_dec.set_input(**dec_params)

# Decodes an input sequence.
def decode_sequence(input_seq):
# Set the input for encoder model.
tvm_enc.set_input('input_1', input_seq)

# Run encoder model
tvm_enc.run()

# Get states from encoder network
h = tvm_enc.get_output(0).asnumpy()
c = tvm_enc.get_output(1).asnumpy()

# Populate the first character of target sequence with the start character.
sampled_token_index = target_token_index['\t']

# Sampling loop for a batch of sequences
decoded_sentence = ''
while True:
# Generate empty target sequence of length 1.
target_seq = np.zeros((1, 1, num_decoder_tokens), dtype='float32')
# Update the target sequence (of length 1).
target_seq[0, 0, sampled_token_index] = 1.

# Set the input and states for decoder model.
tvm_dec.set_input('input_2', target_seq)
tvm_dec.set_input('input_3', h)
tvm_dec.set_input('input_4', c)
# Run decoder model
tvm_dec.run()

output_tokens = tvm_dec.get_output(0).asnumpy()
h = tvm_dec.get_output(1).asnumpy()
c = tvm_dec.get_output(2).asnumpy()

# Sample a token
sampled_token_index = np.argmax(output_tokens[0, -1, :])
sampled_char = reverse_target_char_index[sampled_token_index]

# Exit condition: either hit max length or find stop character.
if sampled_char == '\n':
break

# Update the sentence
decoded_sentence += sampled_char
if len(decoded_sentence) > max_decoder_seq_length:
break
return decoded_sentence

def generate_input_seq(input_text):
input_seq = np.zeros((1, max_encoder_seq_length, num_encoder_tokens), dtype='float32')
for t, char in enumerate(input_text):
input_seq[0, t, input_token_index[char]] = 1.
return input_seq

######################################################################
# Run the model
# -------------
# Randonly take some text from test samples and translate
for seq_index in range(100):
# Take one sentence randomly and try to decode.
index = random.randint(1, test_samples)
input_text, _ = lines[index].split('\t')
input_seq = generate_input_seq(input_text)
decoded_sentence = decode_sequence(input_seq)
print((seq_index + 1), ": ", input_text, "==>", decoded_sentence)