|
28 | 28 | # Network Parameters |
29 | 29 | num_units = 32 # number of neurons for the LSTM layer. |
30 | 30 |
|
31 | | - run = Run() |
32 | | - run.init(metadata={'dataset.num_classes': num_classes, |
33 | | - 'dataset.seq_max_len': seq_max_len, |
34 | | - 'dataset.seq_min_len': seq_min_len, |
35 | | - 'dataset.masking_val': masking_val, |
36 | | - 'training.learning_rate': learning_rate, |
37 | | - 'training.training_steps': training_steps, |
38 | | - 'training.batch_size': batch_size, |
39 | | - 'network.num_units': num_units}, |
40 | | - description="TensorFlow 2.0 implementation of a Recurrent Neural Network (LSTM) that performs dynamic " |
41 | | - "computation over sequences with variable length. This example is using a toy dataset to " |
42 | | - "classify linear sequences. The generated sequences have variable length.") |
43 | | - run.save('dynamic_rnn.py', 'code') |
44 | | - |
45 | | - # ==================== |
46 | | - # TOY DATA GENERATOR |
47 | | - # ==================== |
48 | | - |
49 | | - def toy_sequence_data(): |
50 | | - """ Generate sequence of data with dynamic length. |
51 | | - This function generates toy samples for training: |
52 | | - - Class 0: linear sequences (i.e. [1, 2, 3, 4, ...]) |
53 | | - - Class 1: random sequences (i.e. [9, 3, 10, 7,...]) |
54 | | - |
55 | | - NOTICE: |
56 | | - We have to pad each sequence to reach 'seq_max_len' for TensorFlow |
57 | | - consistency (we cannot feed a numpy array with inconsistent |
58 | | - dimensions). The dynamic calculation will then be perform and ignore |
59 | | - the masked value (here -1). |
60 | | - """ |
61 | | - while True: |
62 | | - # Set variable sequence length. |
63 | | - seq_len = random.randint(seq_min_len, seq_max_len) |
64 | | - rand_start = random.randint(0, max_value - seq_len) |
65 | | - # Add a random or linear int sequence (50% prob). |
66 | | - if random.random() < .5: |
67 | | - # Generate a linear sequence. |
68 | | - seq = np.arange(start=rand_start, stop=rand_start+seq_len) |
69 | | - # Rescale values to [0., 1.]. |
70 | | - seq = seq / max_value |
71 | | - # Pad sequence until the maximum length for dimension consistency. |
72 | | - # Masking value: -1. |
73 | | - seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
74 | | - label = 0 |
75 | | - else: |
76 | | - # Generate a random sequence. |
77 | | - seq = np.random.randint(max_value, size=seq_len) |
78 | | - # Rescale values to [0., 1.]. |
79 | | - seq = seq / max_value |
80 | | - # Pad sequence until the maximum length for dimension consistency. |
81 | | - # Masking value: -1. |
82 | | - seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
83 | | - label = 1 |
84 | | - yield np.array(seq, dtype=np.float32), np.array(label, dtype=np.float32) |
85 | | - |
86 | | - # Use tf.data API to shuffle and batch data. |
87 | | - train_data = tf.data.Dataset.from_generator(toy_sequence_data, output_types=(tf.float32, tf.float32)) |
88 | | - train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1) |
89 | | - |
90 | | - # Create LSTM Model. |
91 | | - class LSTM(Model): |
92 | | - # Set layers. |
93 | | - def __init__(self): |
94 | | - super(LSTM, self).__init__() |
95 | | - # Define a Masking Layer with -1 as mask. |
96 | | - self.masking = layers.Masking(mask_value=masking_val) |
97 | | - # Define a LSTM layer to be applied over the Masking layer. |
98 | | - # Dynamic computation will automatically be performed to ignore -1 values. |
99 | | - self.lstm = layers.LSTM(units=num_units) |
100 | | - # Output fully connected layer (2 classes: linear or random seq). |
101 | | - self.out = layers.Dense(num_classes) |
102 | | - |
103 | | - # Set forward pass. |
104 | | - def call(self, x, is_training=False): |
105 | | - # A RNN Layer expects a 3-dim input (batch_size, seq_len, num_features). |
106 | | - x = tf.reshape(x, shape=[-1, seq_max_len, 1]) |
107 | | - # Apply Masking layer. |
108 | | - x = self.masking(x) |
109 | | - # Apply LSTM layer. |
110 | | - x = self.lstm(x) |
111 | | - # Apply output layer. |
112 | | - x = self.out(x) |
113 | | - if not is_training: |
114 | | - # tf cross entropy expect logits without softmax, so only |
115 | | - # apply softmax when not training. |
116 | | - x = tf.nn.softmax(x) |
117 | | - return x |
118 | | - |
119 | | - # Build LSTM model. |
120 | | - lstm_net = LSTM() |
121 | | - |
122 | | - # Cross-Entropy Loss. |
123 | | - # Note that this will apply 'softmax' to the logits. |
124 | | - def cross_entropy_loss(x, y): |
125 | | - # Convert labels to int 64 for tf cross-entropy function. |
126 | | - y = tf.cast(y, tf.int64) |
127 | | - # Apply softmax to logits and compute cross-entropy. |
128 | | - loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x) |
129 | | - # Average loss across the batch. |
130 | | - return tf.reduce_mean(loss) |
131 | | - |
132 | | - # Accuracy metric. |
133 | | - def accuracy(y_pred, y_true): |
134 | | - # Predicted class is the index of highest score in prediction vector (i.e. argmax). |
135 | | - correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64)) |
136 | | - return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1) |
137 | | - |
138 | | - # Adam optimizer. |
139 | | - optimizer = tf.optimizers.Adam(learning_rate) |
140 | | - |
141 | | - # Optimization process. |
142 | | - def run_optimization(x, y): |
143 | | - # Wrap computation inside a GradientTape for automatic differentiation. |
144 | | - with tf.GradientTape() as g: |
145 | | - # Forward pass. |
146 | | - pred = lstm_net(x, is_training=True) |
147 | | - # Compute loss. |
148 | | - loss = cross_entropy_loss(pred, y) |
149 | | - |
150 | | - # Variables to update, i.e. trainable variables. |
151 | | - trainable_variables = lstm_net.trainable_variables |
152 | | - |
153 | | - # Compute gradients. |
154 | | - gradients = g.gradient(loss, trainable_variables) |
155 | | - |
156 | | - # Update weights following gradients. |
157 | | - optimizer.apply_gradients(zip(gradients, trainable_variables)) |
158 | | - |
159 | | - # Run training for the given number of steps. |
160 | | - for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1): |
161 | | - # Run the optimization to update W and b values. |
162 | | - run_optimization(batch_x, batch_y) |
163 | | - |
164 | | - pred = lstm_net(batch_x, is_training=True) |
165 | | - loss = cross_entropy_loss(pred, batch_y) |
166 | | - acc = accuracy(pred, batch_y) |
167 | | - run.log_metrics({'loss': float(loss), 'accuracy': float(acc)}) |
168 | | - |
169 | | - run.update_metadata({'loss': float(loss), 'accuracy': float(acc)}) |
170 | | - run.close() |
| 31 | + with Run() as run: |
| 32 | + run.init(metadata={'dataset.num_classes': num_classes, |
| 33 | + 'dataset.seq_max_len': seq_max_len, |
| 34 | + 'dataset.seq_min_len': seq_min_len, |
| 35 | + 'dataset.masking_val': masking_val, |
| 36 | + 'training.learning_rate': learning_rate, |
| 37 | + 'training.training_steps': training_steps, |
| 38 | + 'training.batch_size': batch_size, |
| 39 | + 'network.num_units': num_units}, |
| 40 | + description="TensorFlow 2.0 implementation of a Recurrent Neural Network (LSTM) that performs dynamic " |
| 41 | + "computation over sequences with variable length. This example is using a toy dataset to " |
| 42 | + "classify linear sequences. The generated sequences have variable length.") |
| 43 | + run.save('dynamic_rnn.py', 'code') |
| 44 | + |
| 45 | + # ==================== |
| 46 | + # TOY DATA GENERATOR |
| 47 | + # ==================== |
| 48 | + |
| 49 | + def toy_sequence_data(): |
| 50 | + """ Generate sequence of data with dynamic length. |
| 51 | + This function generates toy samples for training: |
| 52 | + - Class 0: linear sequences (i.e. [1, 2, 3, 4, ...]) |
| 53 | + - Class 1: random sequences (i.e. [9, 3, 10, 7,...]) |
| 54 | +
|
| 55 | + NOTICE: |
| 56 | + We have to pad each sequence to reach 'seq_max_len' for TensorFlow |
| 57 | + consistency (we cannot feed a numpy array with inconsistent |
| 58 | + dimensions). The dynamic calculation will then be perform and ignore |
| 59 | + the masked value (here -1). |
| 60 | + """ |
| 61 | + while True: |
| 62 | + # Set variable sequence length. |
| 63 | + seq_len = random.randint(seq_min_len, seq_max_len) |
| 64 | + rand_start = random.randint(0, max_value - seq_len) |
| 65 | + # Add a random or linear int sequence (50% prob). |
| 66 | + if random.random() < .5: |
| 67 | + # Generate a linear sequence. |
| 68 | + seq = np.arange(start=rand_start, stop=rand_start+seq_len) |
| 69 | + # Rescale values to [0., 1.]. |
| 70 | + seq = seq / max_value |
| 71 | + # Pad sequence until the maximum length for dimension consistency. |
| 72 | + # Masking value: -1. |
| 73 | + seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
| 74 | + label = 0 |
| 75 | + else: |
| 76 | + # Generate a random sequence. |
| 77 | + seq = np.random.randint(max_value, size=seq_len) |
| 78 | + # Rescale values to [0., 1.]. |
| 79 | + seq = seq / max_value |
| 80 | + # Pad sequence until the maximum length for dimension consistency. |
| 81 | + # Masking value: -1. |
| 82 | + seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
| 83 | + label = 1 |
| 84 | + yield np.array(seq, dtype=np.float32), np.array(label, dtype=np.float32) |
| 85 | + |
| 86 | + # Use tf.data API to shuffle and batch data. |
| 87 | + train_data = tf.data.Dataset.from_generator(toy_sequence_data, output_types=(tf.float32, tf.float32)) |
| 88 | + train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1) |
| 89 | + |
| 90 | + # Create LSTM Model. |
| 91 | + class LSTM(Model): |
| 92 | + # Set layers. |
| 93 | + def __init__(self): |
| 94 | + super(LSTM, self).__init__() |
| 95 | + # Define a Masking Layer with -1 as mask. |
| 96 | + self.masking = layers.Masking(mask_value=masking_val) |
| 97 | + # Define a LSTM layer to be applied over the Masking layer. |
| 98 | + # Dynamic computation will automatically be performed to ignore -1 values. |
| 99 | + self.lstm = layers.LSTM(units=num_units) |
| 100 | + # Output fully connected layer (2 classes: linear or random seq). |
| 101 | + self.out = layers.Dense(num_classes) |
| 102 | + |
| 103 | + # Set forward pass. |
| 104 | + def call(self, x, is_training=False): |
| 105 | + # A RNN Layer expects a 3-dim input (batch_size, seq_len, num_features). |
| 106 | + x = tf.reshape(x, shape=[-1, seq_max_len, 1]) |
| 107 | + # Apply Masking layer. |
| 108 | + x = self.masking(x) |
| 109 | + # Apply LSTM layer. |
| 110 | + x = self.lstm(x) |
| 111 | + # Apply output layer. |
| 112 | + x = self.out(x) |
| 113 | + if not is_training: |
| 114 | + # tf cross entropy expect logits without softmax, so only |
| 115 | + # apply softmax when not training. |
| 116 | + x = tf.nn.softmax(x) |
| 117 | + return x |
| 118 | + |
| 119 | + # Build LSTM model. |
| 120 | + lstm_net = LSTM() |
| 121 | + |
| 122 | + # Cross-Entropy Loss. |
| 123 | + # Note that this will apply 'softmax' to the logits. |
| 124 | + def cross_entropy_loss(x, y): |
| 125 | + # Convert labels to int 64 for tf cross-entropy function. |
| 126 | + y = tf.cast(y, tf.int64) |
| 127 | + # Apply softmax to logits and compute cross-entropy. |
| 128 | + loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x) |
| 129 | + # Average loss across the batch. |
| 130 | + return tf.reduce_mean(loss) |
| 131 | + |
| 132 | + # Accuracy metric. |
| 133 | + def accuracy(y_pred, y_true): |
| 134 | + # Predicted class is the index of highest score in prediction vector (i.e. argmax). |
| 135 | + correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64)) |
| 136 | + return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1) |
| 137 | + |
| 138 | + # Adam optimizer. |
| 139 | + optimizer = tf.optimizers.Adam(learning_rate) |
| 140 | + |
| 141 | + # Optimization process. |
| 142 | + def run_optimization(x, y): |
| 143 | + # Wrap computation inside a GradientTape for automatic differentiation. |
| 144 | + with tf.GradientTape() as g: |
| 145 | + # Forward pass. |
| 146 | + pred = lstm_net(x, is_training=True) |
| 147 | + # Compute loss. |
| 148 | + loss = cross_entropy_loss(pred, y) |
| 149 | + |
| 150 | + # Variables to update, i.e. trainable variables. |
| 151 | + trainable_variables = lstm_net.trainable_variables |
| 152 | + |
| 153 | + # Compute gradients. |
| 154 | + gradients = g.gradient(loss, trainable_variables) |
| 155 | + |
| 156 | + # Update weights following gradients. |
| 157 | + optimizer.apply_gradients(zip(gradients, trainable_variables)) |
| 158 | + |
| 159 | + # Run training for the given number of steps. |
| 160 | + for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1): |
| 161 | + # Run the optimization to update W and b values. |
| 162 | + run_optimization(batch_x, batch_y) |
| 163 | + |
| 164 | + pred = lstm_net(batch_x, is_training=True) |
| 165 | + loss = cross_entropy_loss(pred, batch_y) |
| 166 | + acc = accuracy(pred, batch_y) |
| 167 | + run.log_metrics({'loss': float(loss), 'accuracy': float(acc)}) |
| 168 | + |
| 169 | + run.update_metadata({'loss': float(loss), 'accuracy': float(acc)}) |
| 170 | + run.close() |
0 commit comments