From e54afde14db5576259cf80c6d2c8b9ceec05926a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikael=20Gro=CC=88n?= <mikael.gron@mindius.se>
Date: Wed, 5 Sep 2018 11:56:27 +0200
Subject: [PATCH] Cleanup (more needed). Some .idea stuffs along for the ride
 too.

---
 .gitignore                    | 159 +++++++++++++++++++++++
 .idea/other.xml               |   6 +
 .idea/sbt.xml                 |   6 +
 .idea/vcs.xml                 |   6 +
 Bitcoin LSTM Prediction.ipynb |   4 +-
 etl.py                        | 233 ++++++++++++++++++----------------
 lstm.py                       |  15 ++-
 run.py                        | 101 +++++++++------
 8 files changed, 378 insertions(+), 152 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 .idea/other.xml
 create mode 100644 .idea/sbt.xml
 create mode 100644 .idea/vcs.xml

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..53fcebb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,159 @@
+# Ignore the data dir to save github space
+data/
+
+# python gitignore -------------------------------------
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# osx gitignore -------------------------------------
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# linux gitignore -------------------------------------
+*~
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# Windows gitignore -------------------------------------
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# Sublime gitignore -------------------------------------
+# cache files for sublime text
+*.tmlanguage.cache
+*.tmPreferences.cache
+*.stTheme.cache
+
+# workspace files are user-specific
+*.sublime-workspace
+
+# project files should be checked into the repository, unless a significant
+# proportion of contributors will probably not be using SublimeText
+# *.sublime-project
+
+# sftp configuration file
+sftp-config.json
+
+# vim gitignore -------------------------------------
+[._]*.s[a-w][a-z]
+[._]s[a-w][a-z]
+*.un~
+Session.vim
+.netrwhist
+*~
+
+# Virtualenv gitignore -------------------------------------
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+.Python
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ss]cripts
+pyvenv.cfg
+pip-selfcheck.json
+
+# JetBrains gitignore -------------------------------------
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio
+
+*.iml
+
+## Directory-based project format:
+.idea/
+# if you remove the above rule, at least ignore the following:
+
+# User-specific stuff:
+.idea/workspace.xml
+.idea/tasks.xml
+.idea/dictionaries
+
+# Sensitive or high-churn files:
+.idea/dataSources.ids
+.idea/dataSources.xml
+.idea/sqlDataSources.xml
+.idea/dynamic.xml
+.idea/uiDesigner.xml
+
+## File-based project format:
+*.ipr
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
diff --git a/.idea/other.xml b/.idea/other.xml
new file mode 100644
index 0000000..a708ec7
--- /dev/null
+++ b/.idea/other.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PySciProjectComponent">
+    <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/sbt.xml b/.idea/sbt.xml
new file mode 100644
index 0000000..2018743
--- /dev/null
+++ b/.idea/sbt.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ScalaSbtSettings">
+    <option name="customVMPath" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/Bitcoin LSTM Prediction.ipynb b/Bitcoin LSTM Prediction.ipynb
index 36733ae..1095cb1 100644
--- a/Bitcoin LSTM Prediction.ipynb	
+++ b/Bitcoin LSTM Prediction.ipynb	
@@ -254,7 +254,6 @@
     {
      "ename": "TypeError",
      "evalue": "Cannot interpret feed_dict key as Tensor: Tensor Tensor(\"lstm_1_input:0\", shape=(?, ?, 4), dtype=float32) is not an element of this graph.",
-     "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
@@ -274,7 +273,8 @@
       "\u001b[0;32mC:\\ProgramData\\Anaconda2\\envs\\python3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    776\u001b[0m     \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    777\u001b[0m       result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 778\u001b[0;31m                          run_metadata_ptr)\n\u001b[0m\u001b[1;32m    779\u001b[0m       \u001b[1;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    780\u001b[0m         \u001b[0mproto_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
       "\u001b[0;32mC:\\ProgramData\\Anaconda2\\envs\\python3\\lib\\site-packages\\tensorflow\\python\\client\\session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    931\u001b[0m           \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    932\u001b[0m             raise TypeError('Cannot interpret feed_dict key as Tensor: '\n\u001b[0;32m--> 933\u001b[0;31m                             + e.args[0])\n\u001b[0m\u001b[1;32m    934\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    935\u001b[0m           \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msubfeed_val\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mops\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mTypeError\u001b[0m: Cannot interpret feed_dict key as Tensor: Tensor Tensor(\"lstm_1_input:0\", shape=(?, ?, 4), dtype=float32) is not an element of this graph."
-     ]
+     ],
+     "output_type": "error"
     }
    ],
    "source": [
diff --git a/etl.py b/etl.py
index 2b4ceb1..2cdcc04 100644
--- a/etl.py
+++ b/etl.py
@@ -2,110 +2,131 @@
 import numpy as np
 import pandas as pd
 
+
 class ETL:
-	"""Extract Transform Load class for all data operations pre model inputs. Data is read in generative way to allow for large datafiles and low memory utilisation"""
-
-	def generate_clean_data(self, filename, batch_size=1000, start_index=0):
-		with h5py.File(filename, 'r') as hf:
-			i = start_index
-			while True:
-				data_x = hf['x'][i:i+batch_size]
-				data_y = hf['y'][i:i+batch_size]
-				i += batch_size
-				yield (data_x, data_y)
-
-	def create_clean_datafile(self, filename_in, filename_out, batch_size=1000, x_window_size=100, y_window_size=1, y_col=0, filter_cols=None, normalise=True):
-		"""Incrementally save a datafile of clean data ready for loading straight into model"""
-		print('> Creating x & y data files...')
-
-		data_gen = self.clean_data(
-			filename_in,
-			batch_size = batch_size,
-			x_window_size = x_window_size,
-			y_window_size = y_window_size,
-			y_col = y_col,
-			filter_cols = filter_cols,
-			normalise = True
-		)
-
-		i = 0
-		with h5py.File(filename_out, 'w') as hf:
-			x1, y1 = next(data_gen)
-			#Initialise hdf5 x, y datasets with first chunk of data
-			rcount_x = x1.shape[0]
-			dset_x = hf.create_dataset('x', shape=x1.shape, maxshape=(None, x1.shape[1], x1.shape[2]), chunks=True)
-			dset_x[:] = x1
-			rcount_y = y1.shape[0]
-			dset_y = hf.create_dataset('y', shape=y1.shape, maxshape=(None,), chunks=True)
-			dset_y[:] = y1
-
-			for x_batch, y_batch in data_gen:
-				#Append batches to x, y hdf5 datasets
-				print('> Creating x & y data files | Batch:', i, end='\r')
-				dset_x.resize(rcount_x + x_batch.shape[0], axis=0)
-				dset_x[rcount_x:] = x_batch
-				rcount_x += x_batch.shape[0]
-				dset_y.resize(rcount_y + y_batch.shape[0], axis=0)
-				dset_y[rcount_y:] = y_batch
-				rcount_y += y_batch.shape[0]
-				i += 1
-
-		print('> Clean datasets created in file `' + filename_out + '.h5`')
-
-	def clean_data(self, filepath, batch_size, x_window_size, y_window_size, y_col, filter_cols, normalise):
-		"""Cleans and Normalises the data in batches `batch_size` at a time"""
-		data = pd.read_csv(filepath, index_col=0)
-
-		if(filter_cols):
-			#Remove any columns from data that we don't need by getting the difference between cols and filter list
-			rm_cols = set(data.columns) - set(filter_cols)
-			for col in rm_cols:
-				del data[col]
-
-		#Convert y-predict column name to numerical index
-		y_col = list(data.columns).index(y_col)
-
-		num_rows = len(data)
-		x_data = []
-		y_data = []
-		i = 0
-		while((i+x_window_size+y_window_size) <= num_rows):
-			x_window_data = data[i:(i+x_window_size)]
-			y_window_data = data[(i+x_window_size):(i+x_window_size+y_window_size)]
-
-			#Remove any windows that contain NaN
-			if(x_window_data.isnull().values.any() or y_window_data.isnull().values.any()):
-				i += 1
-				continue
-			
-			if(normalise):
-				abs_base, x_window_data = self.zero_base_standardise(x_window_data)
-				_, y_window_data = self.zero_base_standardise(y_window_data, abs_base=abs_base)
-
-			#Average of the desired predicter y column
-			y_average = np.average(y_window_data.values[:, y_col])
-			x_data.append(x_window_data.values)
-			y_data.append(y_average)
-			i += 1
-
-			#Restrict yielding until we have enough in our batch. Then clear x, y data for next batch
-			if(i % batch_size == 0):
-				#Convert from list to 3 dimensional numpy array [windows, window_val, val_dimension]
-				x_np_arr = np.array(x_data)
-				y_np_arr = np.array(y_data)
-				x_data = []
-				y_data = []
-				yield (x_np_arr, y_np_arr)
-
-	def zero_base_standardise(self, data, abs_base=pd.DataFrame()):
-		"""Standardise dataframe to be zero based percentage returns from i=0"""
-		if(abs_base.empty): abs_base = data.iloc[0]
-		data_standardised = (data/abs_base)-1
-		return (abs_base, data_standardised)
-
-	def min_max_normalise(self, data, data_min=pd.DataFrame(), data_max=pd.DataFrame()):
-		"""Normalise a Pandas dataframe using column-wise min-max normalisation (can use custom min, max if desired)"""
-		if(data_min.empty): data_min = data.min()
-		if(data_max.empty): data_max = data.max()
-		data_normalised = (data-data_min)/(data_max-data_min)
-		return (data_min, data_max, data_normalised)
\ No newline at end of file
+    """Extract Transform Load class for all data operations pre model inputs.
+    Data is read in generative way to allow for large datafiles and low
+    memory utilisation """
+
+    @staticmethod
+    def generate_clean_data(filename, batch_size=1000, start_index=0):
+        with h5py.File(filename, 'r') as hf:
+            i = start_index
+            while True:
+                data_x = hf['x'][i:i + batch_size]
+                data_y = hf['y'][i:i + batch_size]
+                i += batch_size
+                yield (data_x, data_y)
+
+    def create_clean_datafile(self, filename_in, filename_out, batch_size=1000,
+                              x_window_size=100, y_window_size=1, y_col=0,
+                              filter_cols=None, normalise=True):
+        """Incrementally save a datafile of clean data ready for loading
+        straight into model """
+        print('> Creating x & y data files...')
+
+        data_gen = self.clean_data(
+            filename_in,
+            batch_size=batch_size,
+            x_window_size=x_window_size,
+            y_window_size=y_window_size,
+            y_col=y_col,
+            filter_cols=filter_cols,
+            normalise=normalise
+        )
+
+        i = 0
+        with h5py.File(filename_out, 'w') as hf:
+            x1, y1 = next(data_gen)
+            # Initialise hdf5 x, y datasets with first chunk of data
+            rcount_x = x1.shape[0]
+            dset_x = hf.create_dataset('x', shape=x1.shape, maxshape=(
+            None, x1.shape[1], x1.shape[2]), chunks=True)
+            dset_x[:] = x1
+            rcount_y = y1.shape[0]
+            dset_y = hf.create_dataset('y', shape=y1.shape, maxshape=(None,),
+                                       chunks=True)
+            dset_y[:] = y1
+
+            for x_batch, y_batch in data_gen:
+                # Append batches to x, y hdf5 datasets
+                print('> Creating x & y data files | Batch:', i, end='\r')
+                dset_x.resize(rcount_x + x_batch.shape[0], axis=0)
+                dset_x[rcount_x:] = x_batch
+                rcount_x += x_batch.shape[0]
+                dset_y.resize(rcount_y + y_batch.shape[0], axis=0)
+                dset_y[rcount_y:] = y_batch
+                rcount_y += y_batch.shape[0]
+                i += 1
+
+        print('> Clean datasets created in file `' + filename_out + '`')
+
+    def clean_data(self, filepath, batch_size, x_window_size, y_window_size,
+                   y_col, filter_cols, normalise):
+        """Cleans and Normalises the data in batches `batch_size` at a time"""
+        data = pd.read_csv(filepath, index_col=0)
+
+        if filter_cols:
+            # Remove any columns from data that we don't need by getting the
+            # difference between cols and filter list
+            rm_cols = set(data.columns) - set(filter_cols)
+            for col in rm_cols:
+                del data[col]
+
+        # Convert y-predict column name to numerical index
+        y_col = list(data.columns).index(y_col)
+
+        num_rows = len(data)
+        x_data = []
+        y_data = []
+        i = 0
+        while (i + x_window_size + y_window_size) <= num_rows:
+            x_window_data = data[i:(i + x_window_size)]
+            y_window_data = data[(i + x_window_size):(
+                        i + x_window_size + y_window_size)]
+
+            # Remove any windows that contain NaN
+            if x_window_data.isnull().values.any() \
+                    or y_window_data.isnull().values.any():
+                i += 1
+                continue
+
+            if normalise:
+                abs_base, x_window_data = self.zero_base_standardise(
+                    x_window_data)
+                _, y_window_data = self.zero_base_standardise(y_window_data,
+                                                              abs_base=abs_base)
+
+            # Average of the desired predictor y column
+            y_average = np.average(y_window_data.values[:, y_col])
+            x_data.append(x_window_data.values)
+            y_data.append(y_average)
+            i += 1
+
+            # Restrict yielding until we have enough in our batch. Then clear
+            #  x, y data for next batch
+            if i % batch_size == 0:
+                # Convert from list to 3 dimensional numpy array [windows,
+                # window_val, val_dimension]
+                x_np_arr = np.array(x_data)
+                y_np_arr = np.array(y_data)
+                x_data = []
+                y_data = []
+                yield (x_np_arr, y_np_arr)
+
+    @staticmethod
+    def zero_base_standardise(data, abs_base=pd.DataFrame()):
+        """Standardise dataframe to be zero based percentage returns from i=0"""
+        if (abs_base.empty): abs_base = data.iloc[0]
+        data_standardised = (data / abs_base) - 1
+        return abs_base, data_standardised
+
+    @staticmethod
+    def min_max_normalise(data, data_min=pd.DataFrame(),
+                          data_max=pd.DataFrame()):
+        """Normalise a Pandas dataframe using column-wise min-max
+        normalisation (can use custom min, max if desired) """
+        if data_min.empty: data_min = data.min()
+        if data_max.empty: data_max = data.max()
+        data_normalised = (data - data_min) / (data_max - data_min)
+        return data_min, data_max, data_normalised
diff --git a/lstm.py b/lstm.py
index 8fb5856..f003621 100644
--- a/lstm.py
+++ b/lstm.py
@@ -2,15 +2,15 @@
 import time
 import json
 import warnings
-import numpy as np
-from numpy import newaxis
 from keras.layers.core import Dense, Activation, Dropout
 from keras.layers.recurrent import LSTM
 from keras.models import Sequential
 from keras.models import load_model
 
-configs = json.loads(open(os.path.join(os.path.dirname(__file__), 'configs.json')).read())
-warnings.filterwarnings("ignore") #Hide messy Numpy warnings
+configs = json.loads(
+    open(os.path.join(os.path.dirname(__file__), 'configs.json')).read())
+warnings.filterwarnings("ignore")  # Hide messy Numpy warnings
+
 
 def build_network(layers):
     model = Sequential()
@@ -38,10 +38,11 @@ def build_network(layers):
     print("> Compilation Time : ", time.time() - start)
     return model
 
+
 def load_network(filename):
-    #Load the h5 saved model and weights
-    if(os.path.isfile(filename)):
+    # Load the h5 saved model and weights
+    if os.path.isfile(filename):
         return load_model(filename)
     else:
         print('ERROR: "' + filename + '" file does not exist as a h5 model')
-        return None
\ No newline at end of file
+        return None
diff --git a/run.py b/run.py
index 9a9f2d3..f7ec21a 100644
--- a/run.py
+++ b/run.py
@@ -1,54 +1,74 @@
 import time
-import time
 import threading
 import lstm, etl, json
 import numpy as np
-import pandas as pd
 import h5py
 import matplotlib.pyplot as plt
+
 configs = json.loads(open('configs.json').read())
 tstart = time.time()
 
+
+def get_dataset():
+    """
+    1. Kaggle.com
+    2. Register
+    3. https://www.kaggle.com/mczielinski/bitcoin-historical-data/kernels
+    4. Download. :)
+    :return:
+    """
+    pass
+
+
 def plot_results(predicted_data, true_data):
-    fig=plt.figure(figsize=(18, 12), dpi= 80, facecolor='w', edgecolor='k')
+    fig = plt.figure(figsize=(18, 12), dpi=80, facecolor='w', edgecolor='k')
     ax = fig.add_subplot(111)
     ax.plot(true_data, label='True Data')
     plt.plot(predicted_data, label='Prediction')
     plt.legend()
     plt.show()
 
+
 def predict_sequences_multiple(model, data, window_size, prediction_len):
-    #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
+    # Predict sequence of 50 steps before shifting prediction run forward by
+    # 50 steps
     prediction_seqs = []
-    for i in range(int(len(data)/prediction_len)):
-        curr_frame = data[i*prediction_len]
+    for i in range(int(len(data) / prediction_len)):
+        curr_frame = data[i * prediction_len]
         predicted = []
         for j in range(prediction_len):
-            predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])
+            predicted.append(model.predict(curr_frame[np.newaxis, :, :])[0, 0])
             curr_frame = curr_frame[1:]
-            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
+            curr_frame = np.insert(curr_frame, [window_size - 1], predicted[-1],
+                                   axis=0)
         prediction_seqs.append(predicted)
     return prediction_seqs
 
+
 def plot_results_multiple(predicted_data, true_data, prediction_len):
-    fig=plt.figure(figsize=(18, 12), dpi= 80, facecolor='w', edgecolor='k')
+    fig = plt.figure(figsize=(18, 12), dpi=80, facecolor='w', edgecolor='k')
     ax = fig.add_subplot(111)
     ax.plot(true_data, label='True Data')
-    #Pad the list of predictions to shift it in the graph to it's correct start
+    # Pad the list of predictions to shift it in the graph to it's correct start
     for i, data in enumerate(predicted_data):
         padding = [None for p in range(i * prediction_len)]
         plt.plot(padding + data, label='Prediction')
         plt.legend()
     plt.show()
-    
+
+
 true_values = []
+
+
 def generator_strip_xy(data_gen, true_values):
-    for x, y in data_gen_test:
+    for x, y in data_gen:
         true_values += list(y)
         yield x
-    
+
+
 def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
-    """thread worker for model fitting - so it doesn't freeze on jupyter notebook"""
+    """thread worker for model fitting - so it doesn't
+    freeze on jupyter notebook"""
     model = lstm.build_network([ncols, 150, 150, 1])
     model.fit_generator(
         data_gen_train,
@@ -56,22 +76,25 @@ def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
         epochs=configs['model']['epochs']
     )
     model.save(configs['model']['filename_model'])
-    print('> Model Trained! Weights saved in', configs['model']['filename_model'])
+    print('> Model Trained! Weights saved in',
+          configs['model']['filename_model'])
     return
 
+
 dl = etl.ETL()
 dl.create_clean_datafile(
-    filename_in = configs['data']['filename'],
-    filename_out = configs['data']['filename_clean'],
-    batch_size = configs['data']['batch_size'],
-    x_window_size = configs['data']['x_window_size'],
-    y_window_size = configs['data']['y_window_size'],
-    y_col = configs['data']['y_predict_column'],
-    filter_cols = configs['data']['filter_columns'],
-    normalise = True
+    filename_in=configs['data']['filename'],
+    filename_out=configs['data']['filename_clean'],
+    batch_size=configs['data']['batch_size'],
+    x_window_size=configs['data']['x_window_size'],
+    y_window_size=configs['data']['y_window_size'],
+    y_col=configs['data']['y_predict_column'],
+    filter_cols=configs['data']['filter_columns'],
+    normalise=True
 )
 
-print('> Generating clean data from:', configs['data']['filename_clean'], 'with batch_size:', configs['data']['batch_size'])
+print('> Generating clean data from:', configs['data']['filename_clean'],
+      'with batch_size:', configs['data']['batch_size'])
 
 data_gen_train = dl.generate_clean_data(
     configs['data']['filename_clean'],
@@ -81,13 +104,16 @@ def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
 with h5py.File(configs['data']['filename_clean'], 'r') as hf:
     nrows = hf['x'].shape[0]
     ncols = hf['x'].shape[2]
-    
+
 ntrain = int(configs['data']['train_test_split'] * nrows)
-steps_per_epoch = int((ntrain / configs['model']['epochs']) / configs['data']['batch_size'])
-print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with', steps_per_epoch, 'steps-per-epoch')
+steps_per_epoch = int(
+    (ntrain / configs['model']['epochs']) / configs['data']['batch_size'])
+print('> Clean data has', nrows, 'data rows. Training on', ntrain, 'rows with',
+      steps_per_epoch, 'steps-per-epoch')
 
 model = lstm.build_network([ncols, 150, 150, 1])
-t = threading.Thread(target=fit_model_threaded, args=[model, data_gen_train, steps_per_epoch, configs])
+t = threading.Thread(target=fit_model_threaded,
+                     args=[model, data_gen_train, steps_per_epoch, configs])
 t.start()
 
 data_gen_test = dl.generate_clean_data(
@@ -105,28 +131,29 @@ def fit_model_threaded(model, data_gen_train, steps_per_epoch, configs):
     steps=steps_test
 )
 
-#Save our predictions
+# Save our predictions
 with h5py.File(configs['model']['filename_predictions'], 'w') as hf:
     dset_p = hf.create_dataset('predictions', data=predictions)
     dset_y = hf.create_dataset('true_values', data=true_values)
-    
+
 plot_results(predictions[:800], true_values[:800])
 
-#Reload the data-generator
+# Reload the data-generator
 data_gen_test = dl.generate_clean_data(
     configs['data']['filename_clean'],
     batch_size=800,
     start_index=ntrain
 )
 data_x, true_values = next(data_gen_test)
-window_size = 50 #numer of steps to predict into the future
+window_size = 50  # numer of steps to predict into the future
 
-#We are going to cheat a bit here and just take the next 400 steps from the testing generator and predict that data in its whole
+# We are going to cheat a bit here and just take the next 400 steps from the
+# testing generator and predict that data in its whole
 predictions_multiple = predict_sequences_multiple(
     model,
-    data_x,
-    data_x[0].shape[0],
-    window_size
+    data=data_x,
+    window_size=data_x[0].shape[0],
+    prediction_len=window_size
 )
 
-plot_results_multiple(predictions_multiple, true_values, window_size)
\ No newline at end of file
+plot_results_multiple(predictions_multiple, true_values, window_size)