Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(self, config, session):
self.latest_layer = None
self.output_weights = None
self.output_bias = None
self.l2_term = tf.constant(0, dtype=tf.float64)
self.l2_term = tf.constant(0, dtype=tf.float32)

self.vocabulary_size = config[VOC_SIZE]
self.user_count = config[USER_COUNT]
Expand Down
32 changes: 16 additions & 16 deletions model/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,19 @@ def add_input_layer(self):
[None, self._model.max_title_length],
name="input")
self._model.subreddit_input = \
tf.placeholder(tf.float64,
tf.placeholder(tf.float32,
[None, self._model.subreddit_count],
name="subreddit_input")
self._model.target = \
tf.placeholder(tf.float64,
tf.placeholder(tf.float32,
[None, self._model.user_count],
name="target")
self._model.sec_target = \
tf.placeholder(tf.float64,
tf.placeholder(tf.float32,
[None, self._model.data.subreddit_count],
name="sec_target")

self._model.keep_prob = tf.placeholder(tf.float64, name="keep_prob")
self._model.keep_prob = tf.placeholder(tf.float32, name="keep_prob")

if self._model.rnn_unit == 'lstm':
rnn_layer = tf.contrib.rnn.LSTMCell(self._model.rnn_neurons)
Expand All @@ -73,13 +73,13 @@ def add_input_layer(self):
tf.random_uniform(
[self._model.vocabulary_size,
self._model.embedding_size],
-1.0, 1.0, dtype=tf.float64),
-1.0, 1.0, dtype=tf.float32),
trainable=self._model.is_trainable_matrix,
name="embedding_matrix",
dtype=tf.float64)
dtype=tf.float32)

self._model.embedding_placeholder = \
tf.placeholder(tf.float64,
tf.placeholder(tf.float32,
[self._model.vocabulary_size, self._model.embedding_size])
self._model.embedding_init = \
embedding_matrix.assign(self._model.embedding_placeholder)
Expand All @@ -88,7 +88,7 @@ def add_input_layer(self):
self._model.input)
# Run the LSTM layer with the embedded input
outputs, _ = tf.nn.dynamic_rnn(rnn_layer, embedded_input,
dtype=tf.float64)
dtype=tf.float32)

outputs = tf.transpose(outputs, [1, 0, 2])
output = outputs[-1]
Expand All @@ -98,13 +98,13 @@ def add_input_layer(self):
[self._model.subreddit_count,
self._model.subreddit_input_neurons],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="sub_input_weights")

subreddit_bias = tf.Variable(tf.random_normal(
[self._model.subreddit_input_neurons],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="sub_input_bias")

logit_subreddit = tf.add(
Expand All @@ -128,22 +128,22 @@ def add_layer(self, number_of_neurons):
else 0),
number_of_neurons],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="weights" + str(self.number_of_layers))
bias = tf.Variable(tf.random_normal([number_of_neurons],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="biases" + str(self.number_of_layers))

else:
weights = tf.Variable(tf.random_normal(
[self._model.latest_layer.get_shape()[1].value, number_of_neurons],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="weights" + str(self.number_of_layers))
bias = tf.Variable(tf.random_normal([number_of_neurons],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="biases" + str(self.number_of_layers))

logits = tf.add(tf.matmul(self._model.latest_layer, weights), bias)
Expand All @@ -169,12 +169,12 @@ def add_output_layer(self, output_size, secondary_output=False):
sigmoid_weights = tf.Variable(tf.random_normal(
[self._model.latest_layer.get_shape()[1].value, output_size],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="output_weights")

sigmoid_bias = tf.Variable(tf.random_normal([output_size],
stddev=0.35,
dtype=tf.float64),
dtype=tf.float32),
name="output_biases")

logits = tf.add(tf.matmul(self._model.latest_layer, sigmoid_weights), sigmoid_bias)
Expand Down
2 changes: 1 addition & 1 deletion model/util/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@
for conf in cfg['network']:
for (key, value) in conf.items():
if type(value) is float:
conf[key] = np.float64(value)
conf[key] = np.float32(value)


30 changes: 14 additions & 16 deletions model/util/csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,33 +74,31 @@ def get_data(self, datatype, data_column=[0], sub_column=1, label_column=2):
subreddit_full.append(subreddit)
return data_full, subreddit_full, label_full

def load_pretrained_embeddings(self, file_name, dimension_size=50):
file_path = os.path.join(DATASETS_PATH, file_name)
matrix = [np.random.rand(dimension_size).astype(np.float32)]
word_dict = dict()
word_dict['UNK'] = 0
count = 1

def test_load_pretrained_embeddings(self, fileName, dimension_size=50):
file_path = os.path.join(DATASETS_PATH, fileName)
with open(file_path, 'r', encoding='UTF-8') as csvfile:
reader = csv.reader(csvfile, delimiter=' ', quoting=csv.QUOTE_NONE)
word_dict = dict()
matrix = []

word_dict['UNK'] = len(matrix)
matrix.append(np.random.rand(1, dimension_size)[0].tolist())

for row in reader:
first_col = row[0]

if first_col in ['!', '?', '-', '_', '.', ',', '\'', '\"', ':', ';', '%', '(', ')']:
continue

if first_col[0] == '<': #some words are tokens for usernames like <user> or <caps> etc, ignore them.
# Some words are tokens for usernames like <user> or
# <caps> etc, ignore them.
if first_col[0] == '<':
continue
word_dict[first_col] = len(matrix)
matrix.append(row[1:])
embed_matrix = np.array(matrix)
embed_matrix = embed_matrix.astype(np.float64)
return word_dict, embed_matrix



word_dict[first_col] = count
row_array = np.array(row[1:], dtype=np.float32)
matrix.append(row_array)
count += 1
return word_dict, np.array(matrix)



2 changes: 1 addition & 1 deletion model/util/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _build_dict(self):
helper.build_dataset(vocab, vocabulary_size=self.vocabulary_size)
else:
self.word_dict, self.embedding_matrix = \
self.reader.test_load_pretrained_embeddings(
self.reader.load_pretrained_embeddings(
self.pre_trained_matrix,
self.embedding_size)
users = " ".join(self.train_labels).split()
Expand Down