This repository was archived by the owner on Oct 5, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathmodel.py
More file actions
245 lines (206 loc) · 9.7 KB
/
model.py
File metadata and controls
245 lines (206 loc) · 9.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Copyright 2015 Conchylicultor. All Rights Reserved.
# Modifications copyright (C) 2016 Carlos Segura
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Model to predict the next sentence given an input sequence
"""
import tensorflow as tf
from chatbot.textdata import Batch
class ProjectionOp:
""" Single layer perceptron
Project input tensor on the output dimension
"""
def __init__(self, shape, scope=None, dtype=None):
"""
Args:
shape: a tuple (input dim, output dim)
scope (str): encapsulate variables
dtype: the weights type
"""
assert len(shape) == 2
self.scope = scope
# Projection on the keyboard
with tf.variable_scope('weights_' + self.scope):
self.W_t = tf.get_variable(
'weights',
shape,
# initializer=tf.truncated_normal_initializer() # TODO: Tune value (fct of input size: 1/sqrt(input_dim))
dtype=dtype
)
self.b = tf.get_variable(
'bias',
shape[0],
initializer=tf.constant_initializer(),
dtype=dtype
)
self.W = tf.transpose(self.W_t)
def getWeights(self):
""" Convenience method for some tf arguments
"""
return self.W, self.b
def __call__(self, X):
""" Project the output of the decoder into the vocabulary space
Args:
X (tf.Tensor): input value
"""
with tf.name_scope(self.scope):
return tf.matmul(X, self.W) + self.b
class Model:
"""
Implementation of a seq2seq model.
Architecture:
Encoder/decoder
2 LTSM layers
"""
def __init__(self, args, textData):
"""
Args:
args: parameters of the model
textData: the dataset object
"""
print("Model creation...")
self.textData = textData # Keep a reference on the dataset
self.args = args # Keep track of the parameters of the model
self.dtype = tf.float32
# Placeholders
self.encoderInputs = None
self.decoderInputs = None # Same that decoderTarget plus the <go>
self.decoderTargets = None
self.decoderWeights = None # Adjust the learning to the target sentence size
# Main operators
self.lossFct = None
self.optOp = None
self.outputs = None # Outputs of the network, list of probability for each words
# Construct the graphs
self.buildNetwork()
def buildNetwork(self):
""" Create the computational graph
"""
# TODO: Create name_scopes (for better graph visualisation)
# TODO: Use buckets (better perfs)
# Parameters of sampled softmax (needed for attention mechanism and a large vocabulary size)
outputProjection = None
# Sampled softmax only makes sense if we sample less than vocabulary size.
if 0 < self.args.softmaxSamples < self.textData.getVocabularySize():
outputProjection = ProjectionOp(
(self.textData.getVocabularySize(), self.args.hiddenSize),
scope='softmax_projection',
dtype=self.dtype
)
def sampledSoftmax(labels, inputs):
labels = tf.reshape(labels, [-1, 1]) # Add one dimension (nb of true classes, here 1)
# We need to compute the sampled_softmax_loss using 32bit floats to
# avoid numerical instabilities.
localWt = tf.cast(outputProjection.W_t, tf.float32)
localB = tf.cast(outputProjection.b, tf.float32)
localInputs = tf.cast(inputs, tf.float32)
return tf.cast(
tf.nn.sampled_softmax_loss(
localWt, # Should have shape [num_classes, dim]
localB,
labels,
localInputs,
self.args.softmaxSamples, # The number of classes to randomly sample per batch
self.textData.getVocabularySize()), # The number of classes
self.dtype)
# Creation of the rnn cell
def create_rnn_cell():
encoDecoCell = tf.contrib.rnn.BasicLSTMCell( # Or GRUCell, LSTMCell(args.hiddenSize)
self.args.hiddenSize,
)
if not self.args.test: # TODO: Should use a placeholder instead
encoDecoCell = tf.contrib.rnn.DropoutWrapper(
encoDecoCell,
input_keep_prob=1.0,
output_keep_prob=self.args.dropout
)
return encoDecoCell
encoDecoCell = tf.contrib.rnn.MultiRNNCell(
[create_rnn_cell() for _ in range(self.args.numLayers)],
)
# Network input (placeholders)
with tf.name_scope('placeholder_encoder'):
self.encoderInputs = [tf.placeholder(tf.int32, [None, ]) for _ in range(self.args.maxLengthEnco)] # Batch size * sequence length * input dim
with tf.name_scope('placeholder_decoder'):
self.decoderInputs = [tf.placeholder(tf.int32, [None, ], name='inputs') for _ in range(self.args.maxLengthDeco)] # Same sentence length for input and output (Right ?)
self.decoderTargets = [tf.placeholder(tf.int32, [None, ], name='targets') for _ in range(self.args.maxLengthDeco)]
self.decoderWeights = [tf.placeholder(tf.float32, [None, ], name='weights') for _ in range(self.args.maxLengthDeco)]
# Define the network
# Here we use an embedding model, it takes integer as input and convert them into word vector for
# better word representation
decoderOutputs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(
self.encoderInputs, # List<[batch=?, inputDim=1]>, list of size args.maxLength
self.decoderInputs, # For training, we force the correct output (feed_previous=False)
encoDecoCell,
self.textData.getVocabularySize(),
self.textData.getVocabularySize(), # Both encoder and decoder have the same number of class
embedding_size=self.args.embeddingSize, # Dimension of each word
output_projection=outputProjection.getWeights() if outputProjection else None,
feed_previous=bool(self.args.test) # When we test (self.args.test), we use previous output as next input (feed_previous)
)
# TODO: When the LSTM hidden size is too big, we should project the LSTM output into a smaller space (4086 => 2046): Should speed up
# training and reduce memory usage. Other solution, use sampling softmax
# For testing only
if self.args.test:
if not outputProjection:
self.outputs = decoderOutputs
else:
self.outputs = [outputProjection(output) for output in decoderOutputs]
# TODO: Attach a summary to visualize the output
# For training only
else:
# Finally, we define the loss function
self.lossFct = tf.contrib.legacy_seq2seq.sequence_loss(
decoderOutputs,
self.decoderTargets,
self.decoderWeights,
self.textData.getVocabularySize(),
softmax_loss_function= sampledSoftmax if outputProjection else None # If None, use default SoftMax
)
tf.summary.scalar('loss', self.lossFct) # Keep track of the cost
# Initialize the optimizer
opt = tf.train.AdamOptimizer(
learning_rate=self.args.learningRate,
beta1=0.9,
beta2=0.999,
epsilon=1e-08
)
self.optOp = opt.minimize(self.lossFct)
def step(self, batch):
""" Forward/training step operation.
Does not perform run on itself but just return the operators to do so. Those have then to be run
Args:
batch (Batch): Input data on testing mode, input and target on output mode
Return:
(ops), dict: A tuple of the (training, loss) operators or (outputs,) in testing mode with the associated feed dictionary
"""
# Feed the dictionary
feedDict = {}
ops = None
if not self.args.test: # Training
for i in range(self.args.maxLengthEnco):
feedDict[self.encoderInputs[i]] = batch.encoderSeqs[i]
for i in range(self.args.maxLengthDeco):
feedDict[self.decoderInputs[i]] = batch.decoderSeqs[i]
feedDict[self.decoderTargets[i]] = batch.targetSeqs[i]
feedDict[self.decoderWeights[i]] = batch.weights[i]
ops = (self.optOp, self.lossFct)
else: # Testing (batchSize == 1)
for i in range(self.args.maxLengthEnco):
feedDict[self.encoderInputs[i]] = batch.encoderSeqs[i]
feedDict[self.decoderInputs[0]] = [self.textData.goToken]
ops = (self.outputs,)
# Return one pass operator
return ops, feedDict