diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..53fcebb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,159 @@
+# Ignore the data dir to save github space
+data/
+
+# python gitignore -------------------------------------
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# osx gitignore -------------------------------------
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# linux gitignore -------------------------------------
+*~
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# Windows gitignore -------------------------------------
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# Sublime gitignore -------------------------------------
+# cache files for sublime text
+*.tmlanguage.cache
+*.tmPreferences.cache
+*.stTheme.cache
+
+# workspace files are user-specific
+*.sublime-workspace
+
+# project files should be checked into the repository, unless a significant
+# proportion of contributors will probably not be using SublimeText
+# *.sublime-project
+
+# sftp configuration file
+sftp-config.json
+
+# vim gitignore -------------------------------------
+[._]*.s[a-w][a-z]
+[._]s[a-w][a-z]
+*.un~
+Session.vim
+.netrwhist
+*~
+
+# Virtualenv gitignore -------------------------------------
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+.Python
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ss]cripts
+pyvenv.cfg
+pip-selfcheck.json
+
+# JetBrains gitignore -------------------------------------
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio
+
+*.iml
+
+## Directory-based project format:
+.idea/
+# if you remove the above rule, at least ignore the following:
+
+# User-specific stuff:
+.idea/workspace.xml
+.idea/tasks.xml
+.idea/dictionaries
+
+# Sensitive or high-churn files:
+.idea/dataSources.ids
+.idea/dataSources.xml
+.idea/sqlDataSources.xml
+.idea/dynamic.xml
+.idea/uiDesigner.xml
+
+## File-based project format:
+*.ipr
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
diff --git a/.idea/other.xml b/.idea/other.xml
new file mode 100644
index 0000000..a708ec7
--- /dev/null
+++ b/.idea/other.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/sbt.xml b/.idea/sbt.xml
new file mode 100644
index 0000000..2018743
--- /dev/null
+++ b/.idea/sbt.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Bitcoin LSTM Prediction.ipynb b/Bitcoin LSTM Prediction.ipynb
index 36733ae..1095cb1 100644
--- a/Bitcoin LSTM Prediction.ipynb
+++ b/Bitcoin LSTM Prediction.ipynb
@@ -254,7 +254,6 @@
{
"ename": "TypeError",
"evalue": "Cannot interpret feed_dict key as Tensor: Tensor Tensor(\"lstm_1_input:0\", shape=(?, ?, 4), dtype=float32) is not an element of this graph.",
- "output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
@@ -274,7 +273,8 @@
"\u001b[0;32mC:\\ProgramData\\Anaconda2\\envs\\python3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 777\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 778\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 779\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 780\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[0;32mC:\\ProgramData\\Anaconda2\\envs\\python3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 931\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 932\u001b[0m raise TypeError('Cannot interpret feed_dict key as Tensor: '\n\u001b[0;32m--> 933\u001b[0;31m + e.args[0])\n\u001b[0m\u001b[1;32m 934\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 935\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msubfeed_val\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mops\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: Cannot interpret feed_dict key as Tensor: Tensor Tensor(\"lstm_1_input:0\", shape=(?, ?, 4), dtype=float32) is not an element of this graph."
- ]
+ ],
+ "output_type": "error"
}
],
"source": [
diff --git a/etl.py b/etl.py
index 2b4ceb1..2cdcc04 100644
--- a/etl.py
+++ b/etl.py
@@ -2,110 +2,131 @@
import numpy as np
import pandas as pd
+
class ETL:
- """Extract Transform Load class for all data operations pre model inputs. Data is read in generative way to allow for large datafiles and low memory utilisation"""
-
- def generate_clean_data(self, filename, batch_size=1000, start_index=0):
- with h5py.File(filename, 'r') as hf:
- i = start_index
- while True:
- data_x = hf['x'][i:i+batch_size]
- data_y = hf['y'][i:i+batch_size]
- i += batch_size
- yield (data_x, data_y)
-
- def create_clean_datafile(self, filename_in, filename_out, batch_size=1000, x_window_size=100, y_window_size=1, y_col=0, filter_cols=None, normalise=True):
- """Incrementally save a datafile of clean data ready for loading straight into model"""
- print('> Creating x & y data files...')
-
- data_gen = self.clean_data(
- filename_in,
- batch_size = batch_size,
- x_window_size = x_window_size,
- y_window_size = y_window_size,
- y_col = y_col,
- filter_cols = filter_cols,
- normalise = True
- )
-
- i = 0
- with h5py.File(filename_out, 'w') as hf:
- x1, y1 = next(data_gen)
- #Initialise hdf5 x, y datasets with first chunk of data
- rcount_x = x1.shape[0]
- dset_x = hf.create_dataset('x', shape=x1.shape, maxshape=(None, x1.shape[1], x1.shape[2]), chunks=True)
- dset_x[:] = x1
- rcount_y = y1.shape[0]
- dset_y = hf.create_dataset('y', shape=y1.shape, maxshape=(None,), chunks=True)
- dset_y[:] = y1
-
- for x_batch, y_batch in data_gen:
- #Append batches to x, y hdf5 datasets
- print('> Creating x & y data files | Batch:', i, end='\r')
- dset_x.resize(rcount_x + x_batch.shape[0], axis=0)
- dset_x[rcount_x:] = x_batch
- rcount_x += x_batch.shape[0]
- dset_y.resize(rcount_y + y_batch.shape[0], axis=0)
- dset_y[rcount_y:] = y_batch
- rcount_y += y_batch.shape[0]
- i += 1
-
- print('> Clean datasets created in file `' + filename_out + '.h5`')
-
- def clean_data(self, filepath, batch_size, x_window_size, y_window_size, y_col, filter_cols, normalise):
- """Cleans and Normalises the data in batches `batch_size` at a time"""
- data = pd.read_csv(filepath, index_col=0)
-
- if(filter_cols):
- #Remove any columns from data that we don't need by getting the difference between cols and filter list
- rm_cols = set(data.columns) - set(filter_cols)
- for col in rm_cols:
- del data[col]
-
- #Convert y-predict column name to numerical index
- y_col = list(data.columns).index(y_col)
-
- num_rows = len(data)
- x_data = []
- y_data = []
- i = 0
- while((i+x_window_size+y_window_size) <= num_rows):
- x_window_data = data[i:(i+x_window_size)]
- y_window_data = data[(i+x_window_size):(i+x_window_size+y_window_size)]
-
- #Remove any windows that contain NaN
- if(x_window_data.isnull().values.any() or y_window_data.isnull().values.any()):
- i += 1
- continue
-
- if(normalise):
- abs_base, x_window_data = self.zero_base_standardise(x_window_data)
- _, y_window_data = self.zero_base_standardise(y_window_data, abs_base=abs_base)
-
- #Average of the desired predicter y column
- y_average = np.average(y_window_data.values[:, y_col])
- x_data.append(x_window_data.values)
- y_data.append(y_average)
- i += 1
-
- #Restrict yielding until we have enough in our batch. Then clear x, y data for next batch
- if(i % batch_size == 0):
- #Convert from list to 3 dimensional numpy array [windows, window_val, val_dimension]
- x_np_arr = np.array(x_data)
- y_np_arr = np.array(y_data)
- x_data = []
- y_data = []
- yield (x_np_arr, y_np_arr)
-
- def zero_base_standardise(self, data, abs_base=pd.DataFrame()):
- """Standardise dataframe to be zero based percentage returns from i=0"""
- if(abs_base.empty): abs_base = data.iloc[0]
- data_standardised = (data/abs_base)-1
- return (abs_base, data_standardised)
-
- def min_max_normalise(self, data, data_min=pd.DataFrame(), data_max=pd.DataFrame()):
- """Normalise a Pandas dataframe using column-wise min-max normalisation (can use custom min, max if desired)"""
- if(data_min.empty): data_min = data.min()
- if(data_max.empty): data_max = data.max()
- data_normalised = (data-data_min)/(data_max-data_min)
- return (data_min, data_max, data_normalised)
\ No newline at end of file
+ """Extract Transform Load class for all data operations pre model inputs.
+ Data is read in generative way to allow for large datafiles and low
+ memory utilisation """
+
+ @staticmethod
+ def generate_clean_data(filename, batch_size=1000, start_index=0):
+ with h5py.File(filename, 'r') as hf:
+ i = start_index
+ while True:
+ data_x = hf['x'][i:i + batch_size]
+ data_y = hf['y'][i:i + batch_size]
+ i += batch_size
+ yield (data_x, data_y)
+
+ def create_clean_datafile(self, filename_in, filename_out, batch_size=1000,
+ x_window_size=100, y_window_size=1, y_col=0,
+ filter_cols=None, normalise=True):
+ """Incrementally save a datafile of clean data ready for loading
+ straight into model """
+ print('> Creating x & y data files...')
+
+ data_gen = self.clean_data(
+ filename_in,
+ batch_size=batch_size,
+ x_window_size=x_window_size,
+ y_window_size=y_window_size,
+ y_col=y_col,
+ filter_cols=filter_cols,
+ normalise=normalise
+ )
+
+ i = 0
+ with h5py.File(filename_out, 'w') as hf:
+ x1, y1 = next(data_gen)
+ # Initialise hdf5 x, y datasets with first chunk of data
+ rcount_x = x1.shape[0]
+ dset_x = hf.create_dataset('x', shape=x1.shape, maxshape=(
+ None, x1.shape[1], x1.shape[2]), chunks=True)
+ dset_x[:] = x1
+ rcount_y = y1.shape[0]
+ dset_y = hf.create_dataset('y', shape=y1.shape, maxshape=(None,),
+ chunks=True)
+ dset_y[:] = y1
+
+ for x_batch, y_batch in data_gen:
+ # Append batches to x, y hdf5 datasets
+ print('> Creating x & y data files | Batch:', i, end='\r')
+ dset_x.resize(rcount_x + x_batch.shape[0], axis=0)
+ dset_x[rcount_x:] = x_batch
+ rcount_x += x_batch.shape[0]
+ dset_y.resize(rcount_y + y_batch.shape[0], axis=0)
+ dset_y[rcount_y:] = y_batch
+ rcount_y += y_batch.shape[0]
+ i += 1
+
+ print('> Clean datasets created in file `' + filename_out + '`')
+
+ def clean_data(self, filepath, batch_size, x_window_size, y_window_size,
+ y_col, filter_cols, normalise):
+ """Cleans and Normalises the data in batches `batch_size` at a time"""
+ data = pd.read_csv(filepath, index_col=0)
+
+ if filter_cols:
+ # Remove any columns from data that we don't need by getting the
+ # difference between cols and filter list
+ rm_cols = set(data.columns) - set(filter_cols)
+ for col in rm_cols:
+ del data[col]
+
+ # Convert y-predict column name to numerical index
+ y_col = list(data.columns).index(y_col)
+
+ num_rows = len(data)
+ x_data = []
+ y_data = []
+ i = 0
+ while (i + x_window_size + y_window_size) <= num_rows:
+ x_window_data = data[i:(i + x_window_size)]
+ y_window_data = data[(i + x_window_size):(
+ i + x_window_size + y_window_size)]
+
+ # Remove any windows that contain NaN
+ if x_window_data.isnull().values.any() \
+ or y_window_data.isnull().values.any():
+ i += 1
+ continue
+
+ if normalise:
+ abs_base, x_window_data = self.zero_base_standardise(
+ x_window_data)
+ _, y_window_data = self.zero_base_standardise(y_window_data,
+ abs_base=abs_base)
+
+ # Average of the desired predictor y column
+ y_average = np.average(y_window_data.values[:, y_col])
+ x_data.append(x_window_data.values)
+ y_data.append(y_average)
+ i += 1
+
+ # Restrict yielding until we have enough in our batch. Then clear
+ # x, y data for next batch
+ if i % batch_size == 0:
+ # Convert from list to 3 dimensional numpy array [windows,
+ # window_val, val_dimension]
+ x_np_arr = np.array(x_data)
+ y_np_arr = np.array(y_data)
+ x_data = []
+ y_data = []
+ yield (x_np_arr, y_np_arr)
+
+ @staticmethod
+ def zero_base_standardise(data, abs_base=pd.DataFrame()):
+ """Standardise dataframe to be zero based percentage returns from i=0"""
+ if (abs_base.empty): abs_base = data.iloc[0]
+ data_standardised = (data / abs_base) - 1
+ return abs_base, data_standardised
+
+ @staticmethod
+ def min_max_normalise(data, data_min=pd.DataFrame(),
+ data_max=pd.DataFrame()):
+ """Normalise a Pandas dataframe using column-wise min-max
+ normalisation (can use custom min, max if desired) """
+ if data_min.empty: data_min = data.min()
+ if data_max.empty: data_max = data.max()
+ data_normalised = (data - data_min) / (data_max - data_min)
+ return data_min, data_max, data_normalised
diff --git a/lstm.py b/lstm.py
index 8fb5856..f003621 100644
--- a/lstm.py
+++ b/lstm.py
@@ -2,15 +2,15 @@
import time
import json
import warnings
-import numpy as np
-from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.models import load_model
-configs = json.loads(open(os.path.join(os.path.dirname(__file__), 'configs.json')).read())
-warnings.filterwarnings("ignore") #Hide messy Numpy warnings
+configs = json.loads(
+ open(os.path.join(os.path.dirname(__file__), 'configs.json')).read())
+warnings.filterwarnings("ignore") # Hide messy Numpy warnings
+
def build_network(layers):
model = Sequential()
@@ -38,10 +38,11 @@ def build_network(layers):
print("> Compilation Time : ", time.time() - start)
return model
+
def load_network(filename):
- #Load the h5 saved model and weights
- if(os.path.isfile(filename)):
+ # Load the h5 saved model and weights
+ if os.path.isfile(filename):
return load_model(filename)
else:
print('ERROR: "' + filename + '" file does not exist as a h5 model')
- return None
\ No newline at end of file
+ return None
diff --git a/run.py b/run.py
index 9a9f2d3..f7ec21a 100644
--- a/run.py
+++ b/run.py
@@ -1,54 +1,74 @@
import time
-import time
import threading
import lstm, etl, json
import numpy as np
-import pandas as pd
import h5py
import matplotlib.pyplot as plt
+
configs = json.loads(open('configs.json').read())
tstart = time.time()
+
+def get_dataset():
+ """
+ 1. Kaggle.com
+ 2. Register
+ 3. https://www.kaggle.com/mczielinski/bitcoin-historical-data/kernels
+ 4. Download. :)
+ :return:
+ """
+ pass
+
+
def plot_results(predicted_data, true_data):
- fig=plt.figure(figsize=(18, 12), dpi= 80, facecolor='w', edgecolor='k')
+ fig = plt.figure(figsize=(18, 12), dpi=80, facecolor='w', edgecolor='k')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
plt.plot(predicted_data, label='Prediction')
plt.legend()
plt.show()
+
def predict_sequences_multiple(model, data, window_size, prediction_len):
- #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
+ # Predict sequence of 50 steps before shifting prediction run forward by
+ # 50 steps
prediction_seqs = []
- for i in range(int(len(data)/prediction_len)):
- curr_frame = data[i*prediction_len]
+ for i in range(int(len(data) / prediction_len)):
+ curr_frame = data[i * prediction_len]
predicted = []
for j in range(prediction_len):
- predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])
+ predicted.append(model.predict(curr_frame[np.newaxis, :, :])[0, 0])
curr_frame = curr_frame[1:]
- curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
+ curr_frame = np.insert(curr_frame, [window_size - 1], predicted[-1],
+ axis=0)
prediction_seqs.append(predicted)
return prediction_seqs
+
def plot_results_multiple(predicted_data, true_data, prediction_len):
- fig=plt.figure(figsize=(18, 12), dpi= 80, facecolor='w', edgecolor='k')
+ fig = plt.figure(figsize=(18, 12), dpi=80, facecolor='w', edgecolor='k')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
- #Pad the list of predictions to shift it in the graph to it's correct start
+ # Pad the list of predictions to shift it in the graph to it's correct start
for i, data in enumerate(predicted_data):
padding = [None for p in range(i * prediction_len)]
plt.plot(padding + data, label='Prediction')
plt.legend()
plt.show()
-
+
+
true_values = []
+
+
def generator_strip_xy(data_gen, true_values):
- for x, y in data_gen_test:
+ for x, y in data_gen:
true_values += list(y)
yield x
-
+
+
def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
- """thread worker for model fitting - so it doesn't freeze on jupyter notebook"""
+ """thread worker for model fitting - so it doesn't
+ freeze on jupyter notebook"""
model = lstm.build_network([ncols, 150, 150, 1])
model.fit_generator(
data_gen_train,
@@ -56,22 +76,25 @@ def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
epochs=configs['model']['epochs']
)
model.save(configs['model']['filename_model'])
- print('> Model Trained! Weights saved in', configs['model']['filename_model'])
+ print('> Model Trained! Weights saved in',
+ configs['model']['filename_model'])
return
+
dl = etl.ETL()
dl.create_clean_datafile(
- filename_in = configs['data']['filename'],
- filename_out = configs['data']['filename_clean'],
- batch_size = configs['data']['batch_size'],
- x_window_size = configs['data']['x_window_size'],
- y_window_size = configs['data']['y_window_size'],
- y_col = configs['data']['y_predict_column'],
- filter_cols = configs['data']['filter_columns'],
- normalise = True
+ filename_in=configs['data']['filename'],
+ filename_out=configs['data']['filename_clean'],
+ batch_size=configs['data']['batch_size'],
+ x_window_size=configs['data']['x_window_size'],
+ y_window_size=configs['data']['y_window_size'],
+ y_col=configs['data']['y_predict_column'],
+ filter_cols=configs['data']['filter_columns'],
+ normalise=True
)
-print('> Generating clean data from:', configs['data']['filename_clean'], 'with batch_size:', configs['data']['batch_size'])
+print('> Generating clean data from:', configs['data']['filename_clean'],
+ 'with batch_size:', configs['data']['batch_size'])
data_gen_train = dl.generate_clean_data(
configs['data']['filename_clean'],
@@ -81,13 +104,16 @@ def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
with h5py.File(configs['data']['filename_clean'], 'r') as hf:
nrows = hf['x'].shape[0]
ncols = hf['x'].shape[2]
-
+
ntrain = int(configs['data']['train_test_split'] * nrows)
-steps_per_epoch = int((ntrain / configs['model']['epochs']) / configs['data']['batch_size'])
-print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with', steps_per_epoch, 'steps-per-epoch')
+steps_per_epoch = int(
+ (ntrain / configs['model']['epochs']) / configs['data']['batch_size'])
+print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with',
+ steps_per_epoch, 'steps-per-epoch')
model = lstm.build_network([ncols, 150, 150, 1])
-t = threading.Thread(target=fit_model_threaded, args=[model, data_gen_train, steps_per_epoch, configs])
+t = threading.Thread(target=fit_model_threaded,
+ args=[model, data_gen_train, steps_per_epoch, configs])
t.start()
data_gen_test = dl.generate_clean_data(
@@ -105,28 +131,29 @@ def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
steps=steps_test
)
-#Save our predictions
+# Save our predictions
with h5py.File(configs['model']['filename_predictions'], 'w') as hf:
dset_p = hf.create_dataset('predictions', data=predictions)
dset_y = hf.create_dataset('true_values', data=true_values)
-
+
plot_results(predictions[:800], true_values[:800])
-#Reload the data-generator
+# Reload the data-generator
data_gen_test = dl.generate_clean_data(
configs['data']['filename_clean'],
batch_size=800,
start_index=ntrain
)
data_x, true_values = next(data_gen_test)
-window_size = 50 #numer of steps to predict into the future
+window_size = 50 # numer of steps to predict into the future
-#We are going to cheat a bit here and just take the next 400 steps from the testing generator and predict that data in its whole
+# We are going to cheat a bit here and just take the next 400 steps from the
+# testing generator and predict that data in its whole
predictions_multiple = predict_sequences_multiple(
model,
- data_x,
- data_x[0].shape[0],
- window_size
+ data=data_x,
+ window_size=data_x[0].shape[0],
+ prediction_len=window_size
)
-plot_results_multiple(predictions_multiple, true_values, window_size)
\ No newline at end of file
+plot_results_multiple(predictions_multiple, true_values, window_size)