From 44847b112c1017f446e59d661f274364da2ebd40 Mon Sep 17 00:00:00 2001
From: MadhuTangudu <madhu.tangudu@gmail.com>
Date: Sat, 25 Sep 2021 20:55:25 +0530
Subject: [PATCH 01/13] v0.4.0

changelog:
- added knn.py and test_knn.py modules
- added iris.csv dataset
---
 data/input/iris.csv | 151 +++++++++++++++++++++++++++++++++++++++++++
 mllib/lib/knn.py    | 153 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_knn.py   | 101 +++++++++++++++++++++++++++++
 3 files changed, 405 insertions(+)
 create mode 100644 data/input/iris.csv
 create mode 100644 mllib/lib/knn.py
 create mode 100644 tests/test_knn.py

diff --git a/data/input/iris.csv b/data/input/iris.csv
new file mode 100644
index 0000000..d93a29c
--- /dev/null
+++ b/data/input/iris.csv
@@ -0,0 +1,151 @@
+x3,x4,x1,x2,y
+5.1,3.5,1.4,0.2,0
+4.9,3.0,1.4,0.2,0
+4.7,3.2,1.3,0.2,0
+4.6,3.1,1.5,0.2,0
+5.0,3.6,1.4,0.2,0
+5.4,3.9,1.7,0.4,0
+4.6,3.4,1.4,0.3,0
+5.0,3.4,1.5,0.2,0
+4.4,2.9,1.4,0.2,0
+4.9,3.1,1.5,0.1,0
+5.4,3.7,1.5,0.2,0
+4.8,3.4,1.6,0.2,0
+4.8,3.0,1.4,0.1,0
+4.3,3.0,1.1,0.1,0
+5.8,4.0,1.2,0.2,0
+5.7,4.4,1.5,0.4,0
+5.4,3.9,1.3,0.4,0
+5.1,3.5,1.4,0.3,0
+5.7,3.8,1.7,0.3,0
+5.1,3.8,1.5,0.3,0
+5.4,3.4,1.7,0.2,0
+5.1,3.7,1.5,0.4,0
+4.6,3.6,1.0,0.2,0
+5.1,3.3,1.7,0.5,0
+4.8,3.4,1.9,0.2,0
+5.0,3.0,1.6,0.2,0
+5.0,3.4,1.6,0.4,0
+5.2,3.5,1.5,0.2,0
+5.2,3.4,1.4,0.2,0
+4.7,3.2,1.6,0.2,0
+4.8,3.1,1.6,0.2,0
+5.4,3.4,1.5,0.4,0
+5.2,4.1,1.5,0.1,0
+5.5,4.2,1.4,0.2,0
+4.9,3.1,1.5,0.2,0
+5.0,3.2,1.2,0.2,0
+5.5,3.5,1.3,0.2,0
+4.9,3.6,1.4,0.1,0
+4.4,3.0,1.3,0.2,0
+5.1,3.4,1.5,0.2,0
+5.0,3.5,1.3,0.3,0
+4.5,2.3,1.3,0.3,0
+4.4,3.2,1.3,0.2,0
+5.0,3.5,1.6,0.6,0
+5.1,3.8,1.9,0.4,0
+4.8,3.0,1.4,0.3,0
+5.1,3.8,1.6,0.2,0
+4.6,3.2,1.4,0.2,0
+5.3,3.7,1.5,0.2,0
+5.0,3.3,1.4,0.2,0
+7.0,3.2,4.7,1.4,1
+6.4,3.2,4.5,1.5,1
+6.9,3.1,4.9,1.5,1
+5.5,2.3,4.0,1.3,1
+6.5,2.8,4.6,1.5,1
+5.7,2.8,4.5,1.3,1
+6.3,3.3,4.7,1.6,1
+4.9,2.4,3.3,1.0,1
+6.6,2.9,4.6,1.3,1
+5.2,2.7,3.9,1.4,1
+5.0,2.0,3.5,1.0,1
+5.9,3.0,4.2,1.5,1
+6.0,2.2,4.0,1.0,1
+6.1,2.9,4.7,1.4,1
+5.6,2.9,3.6,1.3,1
+6.7,3.1,4.4,1.4,1
+5.6,3.0,4.5,1.5,1
+5.8,2.7,4.1,1.0,1
+6.2,2.2,4.5,1.5,1
+5.6,2.5,3.9,1.1,1
+5.9,3.2,4.8,1.8,1
+6.1,2.8,4.0,1.3,1
+6.3,2.5,4.9,1.5,1
+6.1,2.8,4.7,1.2,1
+6.4,2.9,4.3,1.3,1
+6.6,3.0,4.4,1.4,1
+6.8,2.8,4.8,1.4,1
+6.7,3.0,5.0,1.7,1
+6.0,2.9,4.5,1.5,1
+5.7,2.6,3.5,1.0,1
+5.5,2.4,3.8,1.1,1
+5.5,2.4,3.7,1.0,1
+5.8,2.7,3.9,1.2,1
+6.0,2.7,5.1,1.6,1
+5.4,3.0,4.5,1.5,1
+6.0,3.4,4.5,1.6,1
+6.7,3.1,4.7,1.5,1
+6.3,2.3,4.4,1.3,1
+5.6,3.0,4.1,1.3,1
+5.5,2.5,4.0,1.3,1
+5.5,2.6,4.4,1.2,1
+6.1,3.0,4.6,1.4,1
+5.8,2.6,4.0,1.2,1
+5.0,2.3,3.3,1.0,1
+5.6,2.7,4.2,1.3,1
+5.7,3.0,4.2,1.2,1
+5.7,2.9,4.2,1.3,1
+6.2,2.9,4.3,1.3,1
+5.1,2.5,3.0,1.1,1
+5.7,2.8,4.1,1.3,1
+6.3,3.3,6.0,2.5,2
+5.8,2.7,5.1,1.9,2
+7.1,3.0,5.9,2.1,2
+6.3,2.9,5.6,1.8,2
+6.5,3.0,5.8,2.2,2
+7.6,3.0,6.6,2.1,2
+4.9,2.5,4.5,1.7,2
+7.3,2.9,6.3,1.8,2
+6.7,2.5,5.8,1.8,2
+7.2,3.6,6.1,2.5,2
+6.5,3.2,5.1,2.0,2
+6.4,2.7,5.3,1.9,2
+6.8,3.0,5.5,2.1,2
+5.7,2.5,5.0,2.0,2
+5.8,2.8,5.1,2.4,2
+6.4,3.2,5.3,2.3,2
+6.5,3.0,5.5,1.8,2
+7.7,3.8,6.7,2.2,2
+7.7,2.6,6.9,2.3,2
+6.0,2.2,5.0,1.5,2
+6.9,3.2,5.7,2.3,2
+5.6,2.8,4.9,2.0,2
+7.7,2.8,6.7,2.0,2
+6.3,2.7,4.9,1.8,2
+6.7,3.3,5.7,2.1,2
+7.2,3.2,6.0,1.8,2
+6.2,2.8,4.8,1.8,2
+6.1,3.0,4.9,1.8,2
+6.4,2.8,5.6,2.1,2
+7.2,3.0,5.8,1.6,2
+7.4,2.8,6.1,1.9,2
+7.9,3.8,6.4,2.0,2
+6.4,2.8,5.6,2.2,2
+6.3,2.8,5.1,1.5,2
+6.1,2.6,5.6,1.4,2
+7.7,3.0,6.1,2.3,2
+6.3,3.4,5.6,2.4,2
+6.4,3.1,5.5,1.8,2
+6.0,3.0,4.8,1.8,2
+6.9,3.1,5.4,2.1,2
+6.7,3.1,5.6,2.4,2
+6.9,3.1,5.1,2.3,2
+5.8,2.7,5.1,1.9,2
+6.8,3.2,5.9,2.3,2
+6.7,3.3,5.7,2.5,2
+6.7,3.0,5.2,2.3,2
+6.3,2.5,5.0,1.9,2
+6.5,3.0,5.2,2.0,2
+6.2,3.4,5.4,2.3,2
+5.9,3.0,5.1,1.8,2
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
new file mode 100644
index 0000000..855c6bb
--- /dev/null
+++ b/mllib/lib/knn.py
@@ -0,0 +1,153 @@
+"""
+Module for commonly used machine learning modelling algorithms.
+
+**Available routines:**
+
+- class ``Knn``: Builds K-Nearest Neighnour model sing cross validation.
+
+Credits
+-------
+::
+
+    Authors:
+        - Diptesh
+        - Madhu
+
+    Date: Sep 25, 2021
+"""
+
+# pylint: disable=invalid-name
+# pylint: disable=too-many-arguments
+# pylint: disable=too-few-public-methods
+
+from typing import List, Dict, Any
+
+import re
+import sys
+from inspect import getsourcefile
+from os.path import abspath
+
+import pandas as pd
+
+from sklearn import neighbors as sn
+from sklearn.preprocessing import scale
+from sklearn.model_selection import GridSearchCV
+
+path = abspath(getsourcefile(lambda: 0))
+path = re.sub(r"(.+\/)(.+.py)", "\\1", path)
+sys.path.insert(0, path)
+
+class Knn():
+    """ K-Nearest Neighbour (KNN) module.
+
+    Objective:
+    - Build KNN model and determine optimal k
+
+    Parameters
+    ----------
+    :df: pandas.DataFrame
+
+        Pandas dataframe containing the `y_var` and `x_var`
+
+    :y_var: str
+
+        Target variable
+
+    :x_var: list
+
+        List containing independant variables
+
+    :method: str, optional
+
+        Can be either `classify` or `regression` (default is 'classify')
+
+    :k_fold: int, optional
+
+        Number of cross validations folds (default is 5)
+
+    :param: dict, optional
+
+        KNN parameters (the default is None).
+        In case of None, the parameters will default to::
+
+            n_neighbors: max(int(len(df)/(k_fold * 2)), 1)
+            weights: ["uniform", "distance"]
+            metric: ["euclidean", "manhattan"]
+
+    Methods
+    -------
+    predict
+
+    Example
+    -------
+    >>> mod = Knn(df=df_ip, y_var=["y"], x_var=["x1", "x2", "x3"])
+    >>> df_op = mod.predict(df_predict)
+
+    """
+
+    def __init__(self,
+                 df: pd.DataFrame,
+                 y_var: str,
+                 x_var: List[str],
+                 method: str = "classify",
+                 k_fold: int = 5,
+                 param: Dict = None):
+        """Initialize variables for module ``Knn``."""
+        self.df = df.reset_index(drop=True)
+        self.y_var = y_var
+        self.x_var = x_var
+        self.method = method
+        self.model = None
+        self.k_fold = k_fold
+        if param is None:
+            max_k = max(int(len(self.df)/(self.k_fold * 2)), 1)
+            param = {"n_neighbors": list(range(1, max_k, 2)),
+                     "weights": ["uniform", "distance"],
+                     "metric": ["euclidean", "manhattan"]}
+        self.param = param
+        self._pre_process()
+        self._fit()
+
+    def _pre_process(self):
+        """Pre-process the data, one hot encoding and scaling."""
+        df_ip_x = pd.get_dummies(self.df[self.x_var])
+        self.x_var = list(df_ip_x.columns)
+        df_ip_x = pd.DataFrame(scale(df_ip_x))
+        df_ip_x.columns = self.x_var
+        self.df = self.df[[self.y_var]].join(df_ip_x)
+
+    def _fit(self) -> Dict[str, Any]:
+        """Fit KNN model."""
+        if self.method == "classify":
+            gs = GridSearchCV(sn.KNeighborsClassifier(),
+                              self.param,
+                              verbose=0,
+                              cv=self.k_fold,
+                              n_jobs=1)
+        elif self.method == "regression":
+            gs = GridSearchCV(sn.KNeighborsRegressor(),
+                              self.param,
+                              verbose=0,
+                              cv=self.k_fold,
+                              n_jobs=1)
+        gs_op = gs.fit(self.df[self.x_var],
+                       self.df[self.y_var])
+        opt_k = gs_op.best_params_.get("n_neighbors")
+        weight = gs_op.best_params_.get("weights")
+        metric = gs_op.best_params_.get("metric")
+        if self.method == "classify":
+            model = sn.KNeighborsClassifier(n_neighbors=opt_k,
+                                            weights=weight,
+                                            metric=metric)
+        elif self.method == "regression":
+            model = sn.KNeighborsRegressor(n_neighbors=opt_k,
+                                           weights=weight,
+                                           metric=metric)
+        self.model = model.fit(self.df[self.x_var],
+                               self.df[self.y_var])
+        return gs_op.best_params_
+
+    def predict(self, x_pred: pd.DataFrame) -> pd.DataFrame:
+        """Prediction module."""
+        x_pred = pd.DataFrame(scale(pd.get_dummies(x_pred)))
+        return self.model.predict(x_pred)
diff --git a/tests/test_knn.py b/tests/test_knn.py
new file mode 100644
index 0000000..2813cbf
--- /dev/null
+++ b/tests/test_knn.py
@@ -0,0 +1,101 @@
+"""
+Test suite module for ``knn``.
+
+Credits
+-------
+::
+
+    Authors:
+        - Diptesh
+        - Madhu
+
+    Date: Sep 25, 2021
+"""
+
+# pylint: disable=invalid-name
+# pylint: disable=wrong-import-position
+
+import unittest
+import warnings
+import re
+import sys
+
+from inspect import getsourcefile
+from os.path import abspath
+
+import pandas as pd
+
+from sklearn.model_selection import train_test_split as split
+
+# Set base path
+path = abspath(getsourcefile(lambda: 0))
+path = re.sub(r"(.+)(\/tests.*)", "\\1", path)
+
+sys.path.insert(0, path)
+
+from mllib.lib.knn import Knn  # noqa: F841
+
+# =============================================================================
+# --- DO NOT CHANGE ANYTHING FROM HERE
+# =============================================================================
+
+path = path + "/data/input/"
+
+# =============================================================================
+# --- User defined functions
+# =============================================================================
+
+
+def ignore_warnings(test_func):
+    """Suppress deprecation warnings."""
+
+    def do_test(self, *args, **kwargs):
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            test_func(self, *args, **kwargs)
+    return do_test
+
+
+class Test_Knn(unittest.TestCase):
+    """Test suite for module ``KNN``."""
+
+    def setUp(self):
+        """Set up for module ``KNN``."""
+
+    def test_knn_class(self):
+        """ Test KNN classification.
+        """
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[["y", "x1", "x2"]]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip["y"],
+                                  test_size=0.1,
+                                  random_state=42)
+        mod = Knn(df_train, "y", ["x1", "x2"], method="classify")
+        y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
+        y = df_test["y"].values.tolist()
+        acc = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
+        self.assertGreaterEqual(acc, 0.93)
+
+    def test_knn_reg(self):
+        """ Test KNN regression.
+        """
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[["y", "x1", "x2"]]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip["y"],
+                                  test_size=0.1,
+                                  random_state=42)
+        mod = Knn(df_train, "y", ["x1", "x2"], method="regression")
+        y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
+        y = df_test["y"].values.tolist()
+        acc = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
+        self.assertGreaterEqual(acc, 0.87)
+
+
+# =============================================================================
+# --- Main
+# =============================================================================
+
+if __name__ == '__main__':
+    unittest.main()

From e0c4f237fd9a32c86618258cf358b60b438421bc Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Sat, 25 Sep 2021 21:45:10 +0530
Subject: [PATCH 02/13] v0.4.0

changelog:
- added parallel processing by default in KNN module
---
 mllib/__main__.py  | 19 +++++++++++++++++--
 mllib/lib/knn.py   | 47 +++++++++++++++++++++++++++-------------------
 mllib/lib/model.py |  2 +-
 tests/test_knn.py  | 12 +++++-------
 4 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/mllib/__main__.py b/mllib/__main__.py
index 2fbd285..7875e9e 100644
--- a/mllib/__main__.py
+++ b/mllib/__main__.py
@@ -28,6 +28,7 @@
 from lib import cfg, utils  # noqa: F841
 from lib.cluster import Cluster  # noqa: F841
 from lib.model import GLMNet  # noqa: F841
+from lib.knn import KNN  # noqa: F841
 
 # =============================================================================
 # --- DO NOT CHANGE ANYTHING FROM HERE
@@ -52,13 +53,13 @@
 CLI.add_argument("-f", "--filename",
                  nargs=1,
                  type=str,
-                 default=["store.csv"],
+                 default=["iris.csv"],
                  help="input csv filename")
 
 args = CLI.parse_args()
 
 fn_ip = args.filename[0]
-fn_ip = "store.csv"
+fn_ip = "iris.csv"
 
 # =============================================================================
 # --- Main
@@ -84,6 +85,20 @@
     print("\nGLMNet\n")
     for k, v in glm_mod.model_summary.items():
         print(k, str(v).rjust(69 - len(k)))
+    print(elapsed_time("Time", start_t),
+          sep="\n")
+    # --- KNN
+    start_t = time.time_ns()
+    df_ip = pd.read_csv(path + "input/iris.csv")
+    df_ip = df_ip[["y", "x1", "x2"]]
+    df_train = df_ip.sample(frac=0.8, random_state=42)
+    df_test = df_ip.drop(df_train.index)
+    mod = KNN(df_train, "y", ["x1", "x2"], method="classify")
+    print("\nKNN\n")
+    y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
+    y = df_test["y"].values.tolist()
+    accuracy = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
+    print("Accuracy:", accuracy)
     print(elapsed_time("Time", start_t),
           sep="\n")
     # --- EOF
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 855c6bb..36c7bc0 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -1,9 +1,9 @@
 """
-Module for commonly used machine learning modelling algorithms.
+k-NN module.
 
 **Available routines:**
 
-- class ``Knn``: Builds K-Nearest Neighnour model sing cross validation.
+- class ``KNN``: Builds K-Nearest Neighnour model using cross validation.
 
 Credits
 -------
@@ -37,35 +37,38 @@
 path = re.sub(r"(.+\/)(.+.py)", "\\1", path)
 sys.path.insert(0, path)
 
-class Knn():
-    """ K-Nearest Neighbour (KNN) module.
+
+class KNN():
+    """K-Nearest Neighbour (KNN) module.
 
     Objective:
-    - Build KNN model and determine optimal k
+        - Build
+          `KNN <https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm>`_
+          model and determine optimal k
 
     Parameters
     ----------
-    :df: pandas.DataFrame
+    df : pandas.DataFrame
 
         Pandas dataframe containing the `y_var` and `x_var`
 
-    :y_var: str
+    y_var : str
 
-        Target variable
+        Dependant variable
 
-    :x_var: list
+    x_var : List[str]
 
-        List containing independant variables
+        Independant variables.
 
-    :method: str, optional
+    method : str, optional
 
-        Can be either `classify` or `regression` (default is 'classify')
+        Can be either `classify` or `regression` (the default is classify)
 
-    :k_fold: int, optional
+    k_fold : int, optional
 
-        Number of cross validations folds (default is 5)
+        Number of cross validations folds (the default is 5)
 
-    :param: dict, optional
+    param : dict, optional
 
         KNN parameters (the default is None).
         In case of None, the parameters will default to::
@@ -74,13 +77,19 @@ class Knn():
             weights: ["uniform", "distance"]
             metric: ["euclidean", "manhattan"]
 
+    Returns
+    -------
+    model : object
+
+        Final optimal model.
+
     Methods
     -------
     predict
 
     Example
     -------
-    >>> mod = Knn(df=df_ip, y_var=["y"], x_var=["x1", "x2", "x3"])
+    >>> mod = KNN(df=df_ip, y_var=["y"], x_var=["x1", "x2", "x3"])
     >>> df_op = mod.predict(df_predict)
 
     """
@@ -92,7 +101,7 @@ def __init__(self,
                  method: str = "classify",
                  k_fold: int = 5,
                  param: Dict = None):
-        """Initialize variables for module ``Knn``."""
+        """Initialize variables for module ``KNN``."""
         self.df = df.reset_index(drop=True)
         self.y_var = y_var
         self.x_var = x_var
@@ -123,13 +132,13 @@ def _fit(self) -> Dict[str, Any]:
                               self.param,
                               verbose=0,
                               cv=self.k_fold,
-                              n_jobs=1)
+                              n_jobs=-1)
         elif self.method == "regression":
             gs = GridSearchCV(sn.KNeighborsRegressor(),
                               self.param,
                               verbose=0,
                               cv=self.k_fold,
-                              n_jobs=1)
+                              n_jobs=-1)
         gs_op = gs.fit(self.df[self.x_var],
                        self.df[self.y_var])
         opt_k = gs_op.best_params_.get("n_neighbors")
diff --git a/mllib/lib/model.py b/mllib/lib/model.py
index a90ebcf..e8adf2a 100644
--- a/mllib/lib/model.py
+++ b/mllib/lib/model.py
@@ -1,5 +1,5 @@
 """
-Module for commonly used machine learning modelling algorithms.
+GLMNet module.
 
 **Available routines:**
 
diff --git a/tests/test_knn.py b/tests/test_knn.py
index 2813cbf..b6a55c8 100644
--- a/tests/test_knn.py
+++ b/tests/test_knn.py
@@ -33,7 +33,7 @@
 
 sys.path.insert(0, path)
 
-from mllib.lib.knn import Knn  # noqa: F841
+from mllib.lib.knn import KNN  # noqa: F841
 
 # =============================================================================
 # --- DO NOT CHANGE ANYTHING FROM HERE
@@ -63,30 +63,28 @@ def setUp(self):
         """Set up for module ``KNN``."""
 
     def test_knn_class(self):
-        """ Test KNN classification.
-        """
+        """KNN: Test for classification."""
         df_ip = pd.read_csv(path + "iris.csv")
         df_ip = df_ip[["y", "x1", "x2"]]
         df_train, df_test = split(df_ip,
                                   stratify=df_ip["y"],
                                   test_size=0.1,
                                   random_state=42)
-        mod = Knn(df_train, "y", ["x1", "x2"], method="classify")
+        mod = KNN(df_train, "y", ["x1", "x2"], method="classify")
         y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
         y = df_test["y"].values.tolist()
         acc = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
         self.assertGreaterEqual(acc, 0.93)
 
     def test_knn_reg(self):
-        """ Test KNN regression.
-        """
+        """KNN: Test for regression."""
         df_ip = pd.read_csv(path + "iris.csv")
         df_ip = df_ip[["y", "x1", "x2"]]
         df_train, df_test = split(df_ip,
                                   stratify=df_ip["y"],
                                   test_size=0.1,
                                   random_state=42)
-        mod = Knn(df_train, "y", ["x1", "x2"], method="regression")
+        mod = KNN(df_train, "y", ["x1", "x2"], method="regression")
         y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
         y = df_test["y"].values.tolist()
         acc = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)

From d28f86978d879f96744b2305a1b6ae193ef48a0c Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Sat, 25 Sep 2021 21:47:36 +0530
Subject: [PATCH 03/13] v0.4.0

---
 logs/cov.out                      | 17 +++++++++--------
 logs/pip.out                      |  2 +-
 logs/pylint/lib-knn-py.out        |  4 ++++
 logs/pylint/tests-test_knn-py.out |  4 ++++
 requirements.txt                  |  2 +-
 5 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 logs/pylint/lib-knn-py.out
 create mode 100644 logs/pylint/tests-test_knn-py.out

diff --git a/logs/cov.out b/logs/cov.out
index 5aceeff..6b2ba28 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,8 +1,9 @@
-Name                    Stmts   Miss  Cover   Missing
------------------------------------------------------
-mllib/__init__.py           7      0   100%
-mllib/lib/__init__.py       7      0   100%
-mllib/lib/cluster.py      103      0   100%
-mllib/lib/model.py         45      0   100%
------------------------------------------------------
-TOTAL                     162      0   100%
+Name                                                        Stmts   Miss  Cover   Missing
+-----------------------------------------------------------------------------------------
+/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           50      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         45      0   100%
+-----------------------------------------------------------------------------------------
+TOTAL                                                         212      0   100%
diff --git a/logs/pip.out b/logs/pip.out
index 03fb79a..f61bf91 100644
--- a/logs/pip.out
+++ b/logs/pip.out
@@ -1 +1 @@
-./bin/run_tests.sh: line 78: pipreqs: command not found
+INFO: Successfully saved requirements file in /media/ph33r/Data/Project/mllib/Git/requirements.txt
diff --git a/logs/pylint/lib-knn-py.out b/logs/pylint/lib-knn-py.out
new file mode 100644
index 0000000..b96d3cd
--- /dev/null
+++ b/logs/pylint/lib-knn-py.out
@@ -0,0 +1,4 @@
+
+------------------------------------
+Your code has been rated at 10.00/10
+
diff --git a/logs/pylint/tests-test_knn-py.out b/logs/pylint/tests-test_knn-py.out
new file mode 100644
index 0000000..b96d3cd
--- /dev/null
+++ b/logs/pylint/tests-test_knn-py.out
@@ -0,0 +1,4 @@
+
+------------------------------------
+Your code has been rated at 10.00/10
+
diff --git a/requirements.txt b/requirements.txt
index b593d70..45ef809 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy==1.19.5
 Cython==0.29.15
 pandas==1.1.3
+numpy==1.19.5
 scikit_learn==1.0

From 0f63e4314d0aa0003798f9f722e660d6be462ee6 Mon Sep 17 00:00:00 2001
From: MadhuTangudu <madhu.tangudu@gmail.com>
Date: Sun, 26 Sep 2021 00:30:00 +0530
Subject: [PATCH 04/13] v0.4.0

changelog:
- _compute_metrics method added
---
 logs/cov.out                      | 18 ++++-----
 logs/pip.out                      |  2 +-
 logs/pylint/lib-knn-py.out        |  9 ++++-
 logs/pylint/tests-test_knn-py.out |  4 +-
 mllib/lib/knn.py                  | 65 ++++++++++++++++++++++++++-----
 tests/test_knn.py                 | 11 +++---
 6 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/logs/cov.out b/logs/cov.out
index 6b2ba28..457d257 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,9 +1,9 @@
-Name                                                        Stmts   Miss  Cover   Missing
------------------------------------------------------------------------------------------
-/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           50      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         45      0   100%
------------------------------------------------------------------------------------------
-TOTAL                                                         212      0   100%
+Name                    Stmts   Miss  Cover   Missing
+-----------------------------------------------------
+mllib/__init__.py           7      0   100%
+mllib/lib/__init__.py       7      0   100%
+mllib/lib/cluster.py      103      0   100%
+mllib/lib/knn.py           67      0   100%
+mllib/lib/model.py         45      0   100%
+-----------------------------------------------------
+TOTAL                     229      0   100%
diff --git a/logs/pip.out b/logs/pip.out
index f61bf91..03fb79a 100644
--- a/logs/pip.out
+++ b/logs/pip.out
@@ -1 +1 @@
-INFO: Successfully saved requirements file in /media/ph33r/Data/Project/mllib/Git/requirements.txt
+./bin/run_tests.sh: line 78: pipreqs: command not found
diff --git a/logs/pylint/lib-knn-py.out b/logs/pylint/lib-knn-py.out
index b96d3cd..840218f 100644
--- a/logs/pylint/lib-knn-py.out
+++ b/logs/pylint/lib-knn-py.out
@@ -1,4 +1,9 @@
+************* Module mllib.lib.knn
+knn.py:174:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:175:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:176:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:177:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
 
-------------------------------------
-Your code has been rated at 10.00/10
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
 
diff --git a/logs/pylint/tests-test_knn-py.out b/logs/pylint/tests-test_knn-py.out
index b96d3cd..d7495ee 100644
--- a/logs/pylint/tests-test_knn-py.out
+++ b/logs/pylint/tests-test_knn-py.out
@@ -1,4 +1,4 @@
 
-------------------------------------
-Your code has been rated at 10.00/10
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
 
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 36c7bc0..b7d037c 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -19,6 +19,8 @@
 # pylint: disable=invalid-name
 # pylint: disable=too-many-arguments
 # pylint: disable=too-few-public-methods
+# pylint: disable=R0902
+# pylint: disable=wrong-import-position
 
 from typing import List, Dict, Any
 
@@ -28,15 +30,19 @@
 from os.path import abspath
 
 import pandas as pd
+import numpy as np
 
 from sklearn import neighbors as sn
 from sklearn.preprocessing import scale
+from sklearn import metrics as sk_metrics
+
 from sklearn.model_selection import GridSearchCV
 
 path = abspath(getsourcefile(lambda: 0))
 path = re.sub(r"(.+\/)(.+.py)", "\\1", path)
 sys.path.insert(0, path)
 
+import metrics  # noqa: F841
 
 class KNN():
     """K-Nearest Neighbour (KNN) module.
@@ -102,9 +108,9 @@ def __init__(self,
                  k_fold: int = 5,
                  param: Dict = None):
         """Initialize variables for module ``KNN``."""
-        self.df = df.reset_index(drop=True)
         self.y_var = y_var
         self.x_var = x_var
+        self.df = df[[self.y_var] + self.x_var].reset_index(drop=True)
         self.method = method
         self.model = None
         self.k_fold = k_fold
@@ -115,7 +121,9 @@ def __init__(self,
                      "metric": ["euclidean", "manhattan"]}
         self.param = param
         self._pre_process()
-        self._fit()
+        self.best_params_ = self._fit()
+        self.model_summary = None
+        self._compute_metrics()
 
     def _pre_process(self):
         """Pre-process the data, one hot encoding and scaling."""
@@ -128,14 +136,16 @@ def _pre_process(self):
     def _fit(self) -> Dict[str, Any]:
         """Fit KNN model."""
         if self.method == "classify":
-            gs = GridSearchCV(sn.KNeighborsClassifier(),
-                              self.param,
+            gs = GridSearchCV(estimator=sn.KNeighborsClassifier(),
+                              param_grid=self.param,
+                              scoring='accuracy',
                               verbose=0,
                               cv=self.k_fold,
                               n_jobs=-1)
         elif self.method == "regression":
-            gs = GridSearchCV(sn.KNeighborsRegressor(),
-                              self.param,
+            gs = GridSearchCV(estimator=sn.KNeighborsRegressor(),
+                              param_grid=self.param,
+                              scoring='neg_root_mean_squared_error',
                               verbose=0,
                               cv=self.k_fold,
                               n_jobs=-1)
@@ -156,7 +166,42 @@ def _fit(self) -> Dict[str, Any]:
                                self.df[self.y_var])
         return gs_op.best_params_
 
-    def predict(self, x_pred: pd.DataFrame) -> pd.DataFrame:
-        """Prediction module."""
-        x_pred = pd.DataFrame(scale(pd.get_dummies(x_pred)))
-        return self.model.predict(x_pred)
+    def _compute_metrics(self):
+        """Compute commonly used metrics to evaluate the model."""
+        y = self.df.iloc[:, 0].values.tolist()
+        y_hat = list(self.predict(self.df[self.x_var])["y"].values)
+        if self.method == "regression":
+            model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
+                             "mae": np.round(metrics.mae(y, y_hat), 3),
+                             "mape": np.round(metrics.mape(y, y_hat), 3),
+                             "rmse": np.round(metrics.rmse(y, y_hat), 3)}
+            model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
+        if self.method == "classify":
+            model_summary = {"acc": np.round(\
+                                     sk_metrics.accuracy_score(y, y_hat), 3),
+                             "f1": np.round(\
+                                     sk_metrics.f1_score(y,
+                                                         y_hat,
+                                                         average='micro'), 3)}
+        self.model_summary = model_summary
+
+    def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
+        """Predict y_var/target variable.
+
+        Parameters
+        ----------
+        df_predict : pd.DataFrame
+
+            Pandas dataframe containing `x_var`.
+
+        Returns
+        -------
+        pd.DataFrame
+
+            Pandas dataframe containing predicted `y_var` and `x_var`.
+        """
+        df_predict = pd.DataFrame(scale(pd.get_dummies(df_predict)))
+        y_hat = self.model.predict(df_predict)
+        df_predict = df_predict.copy()
+        df_predict["y"] = y_hat
+        return df_predict
diff --git a/tests/test_knn.py b/tests/test_knn.py
index b6a55c8..ae2f2ce 100644
--- a/tests/test_knn.py
+++ b/tests/test_knn.py
@@ -26,6 +26,7 @@
 import pandas as pd
 
 from sklearn.model_selection import train_test_split as split
+from sklearn import metrics as sk_metrics
 
 # Set base path
 path = abspath(getsourcefile(lambda: 0))
@@ -71,9 +72,9 @@ def test_knn_class(self):
                                   test_size=0.1,
                                   random_state=42)
         mod = KNN(df_train, "y", ["x1", "x2"], method="classify")
-        y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
+        y_hat = mod.predict(df_test[["x1", "x2"]])["y"].tolist()
         y = df_test["y"].values.tolist()
-        acc = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
+        acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
         self.assertGreaterEqual(acc, 0.93)
 
     def test_knn_reg(self):
@@ -85,10 +86,10 @@ def test_knn_reg(self):
                                   test_size=0.1,
                                   random_state=42)
         mod = KNN(df_train, "y", ["x1", "x2"], method="regression")
-        y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
+        y_hat = mod.predict(df_test[["x1", "x2"]])["y"].tolist()
         y = df_test["y"].values.tolist()
-        acc = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
-        self.assertGreaterEqual(acc, 0.87)
+        acc = round(sk_metrics.mean_squared_error(y, y_hat), 2)
+        self.assertLessEqual(acc, 0.1)
 
 
 # =============================================================================

From 69436d313d2e3544d90d5d41a5b465bea3e2c48f Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Sun, 26 Sep 2021 01:00:09 +0530
Subject: [PATCH 05/13] v0.4.0

changelog:
- minor changes in variables names
---
 logs/cov.out               | 18 +++++++++---------
 logs/pip.out               |  2 +-
 logs/pylint/lib-knn-py.out | 12 ++++++------
 mllib/lib/knn.py           | 13 +++++++------
 requirements.txt           |  2 +-
 5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/logs/cov.out b/logs/cov.out
index 457d257..cc34ded 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,9 +1,9 @@
-Name                    Stmts   Miss  Cover   Missing
------------------------------------------------------
-mllib/__init__.py           7      0   100%
-mllib/lib/__init__.py       7      0   100%
-mllib/lib/cluster.py      103      0   100%
-mllib/lib/knn.py           67      0   100%
-mllib/lib/model.py         45      0   100%
------------------------------------------------------
-TOTAL                     229      0   100%
+Name                                                        Stmts   Miss  Cover   Missing
+-----------------------------------------------------------------------------------------
+/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           69      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         45      0   100%
+-----------------------------------------------------------------------------------------
+TOTAL                                                         231      0   100%
diff --git a/logs/pip.out b/logs/pip.out
index 03fb79a..f61bf91 100644
--- a/logs/pip.out
+++ b/logs/pip.out
@@ -1 +1 @@
-./bin/run_tests.sh: line 78: pipreqs: command not found
+INFO: Successfully saved requirements file in /media/ph33r/Data/Project/mllib/Git/requirements.txt
diff --git a/logs/pylint/lib-knn-py.out b/logs/pylint/lib-knn-py.out
index 840218f..48851ad 100644
--- a/logs/pylint/lib-knn-py.out
+++ b/logs/pylint/lib-knn-py.out
@@ -1,9 +1,9 @@
 ************* Module mllib.lib.knn
-knn.py:174:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:175:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:176:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:177:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:175:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:176:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:177:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:178:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
 
---------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
+-------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 9.70/10, +0.30)
 
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index b7d037c..8d19d7c 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -44,6 +44,7 @@
 
 import metrics  # noqa: F841
 
+
 class KNN():
     """K-Nearest Neighbour (KNN) module.
 
@@ -177,12 +178,11 @@ def _compute_metrics(self):
                              "rmse": np.round(metrics.rmse(y, y_hat), 3)}
             model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
         if self.method == "classify":
-            model_summary = {"acc": np.round(\
-                                     sk_metrics.accuracy_score(y, y_hat), 3),
-                             "f1": np.round(\
-                                     sk_metrics.f1_score(y,
-                                                         y_hat,
-                                                         average='micro'), 3)}
+            accuracy = np.round(sk_metrics.accuracy_score(y, y_hat), 3)
+            f1_score = np.round(sk_metrics.f1_score(y, y_hat,
+                                                    average='micro'), 3)
+            model_summary = {"accuracy": accuracy,
+                             "f1": f1_score}
         self.model_summary = model_summary
 
     def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
@@ -199,6 +199,7 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
         pd.DataFrame
 
             Pandas dataframe containing predicted `y_var` and `x_var`.
+
         """
         df_predict = pd.DataFrame(scale(pd.get_dummies(df_predict)))
         y_hat = self.model.predict(df_predict)
diff --git a/requirements.txt b/requirements.txt
index 45ef809..cf8b072 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-Cython==0.29.15
 pandas==1.1.3
+Cython==0.29.15
 numpy==1.19.5
 scikit_learn==1.0

From 6a5e16a966a1f16d8bfbc691cf3cf2d8a899bb40 Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Sun, 26 Sep 2021 01:11:10 +0530
Subject: [PATCH 06/13] v0.4.0

---
 mllib/lib/knn.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 8d19d7c..a2f2bd2 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -17,10 +17,7 @@
 """
 
 # pylint: disable=invalid-name
-# pylint: disable=too-many-arguments
-# pylint: disable=too-few-public-methods
-# pylint: disable=R0902
-# pylint: disable=wrong-import-position
+# pylint: disable=R0902,R0903,R0913,C0413
 
 from typing import List, Dict, Any
 

From bfd1b133a7966b3bc73c1a9ff7bc3453e07c3dbc Mon Sep 17 00:00:00 2001
From: MadhuTangudu <madhu.tangudu@gmail.com>
Date: Sun, 26 Sep 2021 16:47:34 +0530
Subject: [PATCH 07/13] v0.4.0

changelog:
- cross validation metric changed to accuracy and rmse for classification and regresion in knn.py file
- test for categorical variable added to test_knn.py
---
 data/input/iris.csv        | 302 ++++++++++++++++++-------------------
 logs/cov.out               |  18 +--
 logs/pip.out               |   2 +-
 logs/pylint/lib-knn-py.out |  12 +-
 mllib/lib/knn.py           |  18 ++-
 tests/test_knn.py          |  10 ++
 6 files changed, 190 insertions(+), 172 deletions(-)

diff --git a/data/input/iris.csv b/data/input/iris.csv
index d93a29c..2e5cab0 100644
--- a/data/input/iris.csv
+++ b/data/input/iris.csv
@@ -1,151 +1,151 @@
-x3,x4,x1,x2,y
-5.1,3.5,1.4,0.2,0
-4.9,3.0,1.4,0.2,0
-4.7,3.2,1.3,0.2,0
-4.6,3.1,1.5,0.2,0
-5.0,3.6,1.4,0.2,0
-5.4,3.9,1.7,0.4,0
-4.6,3.4,1.4,0.3,0
-5.0,3.4,1.5,0.2,0
-4.4,2.9,1.4,0.2,0
-4.9,3.1,1.5,0.1,0
-5.4,3.7,1.5,0.2,0
-4.8,3.4,1.6,0.2,0
-4.8,3.0,1.4,0.1,0
-4.3,3.0,1.1,0.1,0
-5.8,4.0,1.2,0.2,0
-5.7,4.4,1.5,0.4,0
-5.4,3.9,1.3,0.4,0
-5.1,3.5,1.4,0.3,0
-5.7,3.8,1.7,0.3,0
-5.1,3.8,1.5,0.3,0
-5.4,3.4,1.7,0.2,0
-5.1,3.7,1.5,0.4,0
-4.6,3.6,1.0,0.2,0
-5.1,3.3,1.7,0.5,0
-4.8,3.4,1.9,0.2,0
-5.0,3.0,1.6,0.2,0
-5.0,3.4,1.6,0.4,0
-5.2,3.5,1.5,0.2,0
-5.2,3.4,1.4,0.2,0
-4.7,3.2,1.6,0.2,0
-4.8,3.1,1.6,0.2,0
-5.4,3.4,1.5,0.4,0
-5.2,4.1,1.5,0.1,0
-5.5,4.2,1.4,0.2,0
-4.9,3.1,1.5,0.2,0
-5.0,3.2,1.2,0.2,0
-5.5,3.5,1.3,0.2,0
-4.9,3.6,1.4,0.1,0
-4.4,3.0,1.3,0.2,0
-5.1,3.4,1.5,0.2,0
-5.0,3.5,1.3,0.3,0
-4.5,2.3,1.3,0.3,0
-4.4,3.2,1.3,0.2,0
-5.0,3.5,1.6,0.6,0
-5.1,3.8,1.9,0.4,0
-4.8,3.0,1.4,0.3,0
-5.1,3.8,1.6,0.2,0
-4.6,3.2,1.4,0.2,0
-5.3,3.7,1.5,0.2,0
-5.0,3.3,1.4,0.2,0
-7.0,3.2,4.7,1.4,1
-6.4,3.2,4.5,1.5,1
-6.9,3.1,4.9,1.5,1
-5.5,2.3,4.0,1.3,1
-6.5,2.8,4.6,1.5,1
-5.7,2.8,4.5,1.3,1
-6.3,3.3,4.7,1.6,1
-4.9,2.4,3.3,1.0,1
-6.6,2.9,4.6,1.3,1
-5.2,2.7,3.9,1.4,1
-5.0,2.0,3.5,1.0,1
-5.9,3.0,4.2,1.5,1
-6.0,2.2,4.0,1.0,1
-6.1,2.9,4.7,1.4,1
-5.6,2.9,3.6,1.3,1
-6.7,3.1,4.4,1.4,1
-5.6,3.0,4.5,1.5,1
-5.8,2.7,4.1,1.0,1
-6.2,2.2,4.5,1.5,1
-5.6,2.5,3.9,1.1,1
-5.9,3.2,4.8,1.8,1
-6.1,2.8,4.0,1.3,1
-6.3,2.5,4.9,1.5,1
-6.1,2.8,4.7,1.2,1
-6.4,2.9,4.3,1.3,1
-6.6,3.0,4.4,1.4,1
-6.8,2.8,4.8,1.4,1
-6.7,3.0,5.0,1.7,1
-6.0,2.9,4.5,1.5,1
-5.7,2.6,3.5,1.0,1
-5.5,2.4,3.8,1.1,1
-5.5,2.4,3.7,1.0,1
-5.8,2.7,3.9,1.2,1
-6.0,2.7,5.1,1.6,1
-5.4,3.0,4.5,1.5,1
-6.0,3.4,4.5,1.6,1
-6.7,3.1,4.7,1.5,1
-6.3,2.3,4.4,1.3,1
-5.6,3.0,4.1,1.3,1
-5.5,2.5,4.0,1.3,1
-5.5,2.6,4.4,1.2,1
-6.1,3.0,4.6,1.4,1
-5.8,2.6,4.0,1.2,1
-5.0,2.3,3.3,1.0,1
-5.6,2.7,4.2,1.3,1
-5.7,3.0,4.2,1.2,1
-5.7,2.9,4.2,1.3,1
-6.2,2.9,4.3,1.3,1
-5.1,2.5,3.0,1.1,1
-5.7,2.8,4.1,1.3,1
-6.3,3.3,6.0,2.5,2
-5.8,2.7,5.1,1.9,2
-7.1,3.0,5.9,2.1,2
-6.3,2.9,5.6,1.8,2
-6.5,3.0,5.8,2.2,2
-7.6,3.0,6.6,2.1,2
-4.9,2.5,4.5,1.7,2
-7.3,2.9,6.3,1.8,2
-6.7,2.5,5.8,1.8,2
-7.2,3.6,6.1,2.5,2
-6.5,3.2,5.1,2.0,2
-6.4,2.7,5.3,1.9,2
-6.8,3.0,5.5,2.1,2
-5.7,2.5,5.0,2.0,2
-5.8,2.8,5.1,2.4,2
-6.4,3.2,5.3,2.3,2
-6.5,3.0,5.5,1.8,2
-7.7,3.8,6.7,2.2,2
-7.7,2.6,6.9,2.3,2
-6.0,2.2,5.0,1.5,2
-6.9,3.2,5.7,2.3,2
-5.6,2.8,4.9,2.0,2
-7.7,2.8,6.7,2.0,2
-6.3,2.7,4.9,1.8,2
-6.7,3.3,5.7,2.1,2
-7.2,3.2,6.0,1.8,2
-6.2,2.8,4.8,1.8,2
-6.1,3.0,4.9,1.8,2
-6.4,2.8,5.6,2.1,2
-7.2,3.0,5.8,1.6,2
-7.4,2.8,6.1,1.9,2
-7.9,3.8,6.4,2.0,2
-6.4,2.8,5.6,2.2,2
-6.3,2.8,5.1,1.5,2
-6.1,2.6,5.6,1.4,2
-7.7,3.0,6.1,2.3,2
-6.3,3.4,5.6,2.4,2
-6.4,3.1,5.5,1.8,2
-6.0,3.0,4.8,1.8,2
-6.9,3.1,5.4,2.1,2
-6.7,3.1,5.6,2.4,2
-6.9,3.1,5.1,2.3,2
-5.8,2.7,5.1,1.9,2
-6.8,3.2,5.9,2.3,2
-6.7,3.3,5.7,2.5,2
-6.7,3.0,5.2,2.3,2
-6.3,2.5,5.0,1.9,2
-6.5,3.0,5.2,2.0,2
-6.2,3.4,5.4,2.3,2
-5.9,3.0,5.1,1.8,2
+x3,x4,x1,x2,x5,y
+5.1,3.5,1.4,0.2,a,0
+4.9,3,1.4,0.2,a,0
+4.7,3.2,1.3,0.2,a,0
+4.6,3.1,1.5,0.2,a,0
+5,3.6,1.4,0.2,a,0
+5.4,3.9,1.7,0.4,a,0
+4.6,3.4,1.4,0.3,a,0
+5,3.4,1.5,0.2,a,0
+4.4,2.9,1.4,0.2,e,0
+4.9,3.1,1.5,0.1,e,0
+5.4,3.7,1.5,0.2,e,0
+4.8,3.4,1.6,0.2,e,0
+4.8,3,1.4,0.1,e,0
+4.3,3,1.1,0.1,e,0
+5.8,4,1.2,0.2,e,0
+5.7,4.4,1.5,0.4,e,0
+5.4,3.9,1.3,0.4,e,0
+5.1,3.5,1.4,0.3,e,0
+5.7,3.8,1.7,0.3,e,0
+5.1,3.8,1.5,0.3,s,0
+5.4,3.4,1.7,0.2,s,0
+5.1,3.7,1.5,0.4,s,0
+4.6,3.6,1,0.2,s,0
+5.1,3.3,1.7,0.5,s,0
+4.8,3.4,1.9,0.2,s,0
+5,3,1.6,0.2,s,0
+5,3.4,1.6,0.4,s,0
+5.2,3.5,1.5,0.2,s,0
+5.2,3.4,1.4,0.2,s,0
+4.7,3.2,1.6,0.2,s,0
+4.8,3.1,1.6,0.2,e,0
+5.4,3.4,1.5,0.4,s,0
+5.2,4.1,1.5,0.1,a,0
+5.5,4.2,1.4,0.2,s,0
+4.9,3.1,1.5,0.2,a,0
+5,3.2,1.2,0.2,s,0
+5.5,3.5,1.3,0.2,a,0
+4.9,3.6,1.4,0.1,e,0
+4.4,3,1.3,0.2,s,0
+5.1,3.4,1.5,0.2,a,0
+5,3.5,1.3,0.3,s,0
+4.5,2.3,1.3,0.3,e,0
+4.4,3.2,1.3,0.2,s,0
+5,3.5,1.6,0.6,s,0
+5.1,3.8,1.9,0.4,s,0
+4.8,3,1.4,0.3,s,0
+5.1,3.8,1.6,0.2,a,0
+4.6,3.2,1.4,0.2,a,0
+5.3,3.7,1.5,0.2,a,0
+5,3.3,1.4,0.2,a,0
+7,3.2,4.7,1.4,e,1
+6.4,3.2,4.5,1.5,e,1
+6.9,3.1,4.9,1.5,e,1
+5.5,2.3,4,1.3,e,1
+6.5,2.8,4.6,1.5,s,1
+5.7,2.8,4.5,1.3,e,1
+6.3,3.3,4.7,1.6,s,1
+4.9,2.4,3.3,1,a,1
+6.6,2.9,4.6,1.3,s,1
+5.2,2.7,3.9,1.4,e,1
+5,2,3.5,1,s,1
+5.9,3,4.2,1.5,a,1
+6,2.2,4,1,s,1
+6.1,2.9,4.7,1.4,e,1
+5.6,2.9,3.6,1.3,s,1
+6.7,3.1,4.4,1.4,a,1
+5.6,3,4.5,1.5,a,1
+5.8,2.7,4.1,1,s,1
+6.2,2.2,4.5,1.5,e,1
+5.6,2.5,3.9,1.1,a,1
+5.9,3.2,4.8,1.8,e,1
+6.1,2.8,4,1.3,e,1
+6.3,2.5,4.9,1.5,s,1
+6.1,2.8,4.7,1.2,e,1
+6.4,2.9,4.3,1.3,s,1
+6.6,3,4.4,1.4,a,1
+6.8,2.8,4.8,1.4,s,1
+6.7,3,5,1.7,e,1
+6,2.9,4.5,1.5,s,1
+5.7,2.6,3.5,1,a,1
+5.5,2.4,3.8,1.1,s,1
+5.5,2.4,3.7,1,e,1
+5.8,2.7,3.9,1.2,s,1
+6,2.7,5.1,1.6,e,1
+5.4,3,4.5,1.5,s,1
+6,3.4,4.5,1.6,a,1
+6.7,3.1,4.7,1.5,a,1
+6.3,2.3,4.4,1.3,s,1
+5.6,3,4.1,1.3,e,1
+5.5,2.5,4,1.3,a,1
+5.5,2.6,4.4,1.2,e,1
+6.1,3,4.6,1.4,e,1
+5.8,2.6,4,1.2,s,1
+5,2.3,3.3,1,e,1
+5.6,2.7,4.2,1.3,s,1
+5.7,3,4.2,1.2,a,1
+5.7,2.9,4.2,1.3,s,1
+6.2,2.9,4.3,1.3,e,1
+5.1,2.5,3,1.1,s,1
+5.7,2.8,4.1,1.3,s,1
+6.3,3.3,6,2.5,s,2
+5.8,2.7,5.1,1.9,s,2
+7.1,3,5.9,2.1,a,2
+6.3,2.9,5.6,1.8,a,2
+6.5,3,5.8,2.2,a,2
+7.6,3,6.6,2.1,a,2
+4.9,2.5,4.5,1.7,e,2
+7.3,2.9,6.3,1.8,e,2
+6.7,2.5,5.8,1.8,e,2
+7.2,3.6,6.1,2.5,e,2
+6.5,3.2,5.1,2,s,2
+6.4,2.7,5.3,1.9,e,2
+6.8,3,5.5,2.1,s,2
+5.7,2.5,5,2,s,2
+5.8,2.8,5.1,2.4,e,2
+6.4,3.2,5.3,2.3,s,2
+6.5,3,5.5,1.8,a,2
+7.7,3.8,6.7,2.2,a,2
+7.7,2.6,6.9,2.3,s,2
+6,2.2,5,1.5,e,2
+6.9,3.2,5.7,2.3,s,2
+5.6,2.8,4.9,2,a,2
+7.7,2.8,6.7,2,s,2
+6.3,2.7,4.9,1.8,a,2
+6.7,3.3,5.7,2.1,s,2
+7.2,3.2,6,1.8,a,2
+6.2,2.8,4.8,1.8,s,2
+6.1,3,4.9,1.8,a,2
+6.4,2.8,5.6,2.1,s,2
+7.2,3,5.8,1.6,e,2
+7.4,2.8,6.1,1.9,e,2
+7.9,3.8,6.4,2,e,2
+6.4,2.8,5.6,2.2,e,2
+6.3,2.8,5.1,1.5,e,2
+6.1,2.6,5.6,1.4,s,2
+7.7,3,6.1,2.3,s,2
+6.3,3.4,5.6,2.4,s,2
+6.4,3.1,5.5,1.8,s,2
+6,3,4.8,1.8,s,2
+6.9,3.1,5.4,2.1,a,2
+6.7,3.1,5.6,2.4,a,2
+6.9,3.1,5.1,2.3,a,2
+5.8,2.7,5.1,1.9,a,2
+6.8,3.2,5.9,2.3,a,2
+6.7,3.3,5.7,2.5,s,2
+6.7,3,5.2,2.3,s,2
+6.3,2.5,5,1.9,s,2
+6.5,3,5.2,2,e,2
+6.2,3.4,5.4,2.3,e,2
+5.9,3,5.1,1.8,e,2
diff --git a/logs/cov.out b/logs/cov.out
index cc34ded..a8c7525 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,9 +1,9 @@
-Name                                                        Stmts   Miss  Cover   Missing
------------------------------------------------------------------------------------------
-/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           69      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         45      0   100%
------------------------------------------------------------------------------------------
-TOTAL                                                         231      0   100%
+Name                    Stmts   Miss  Cover   Missing
+-----------------------------------------------------
+mllib/__init__.py           7      0   100%
+mllib/lib/__init__.py       7      0   100%
+mllib/lib/cluster.py      103      0   100%
+mllib/lib/knn.py           77      0   100%
+mllib/lib/model.py         45      0   100%
+-----------------------------------------------------
+TOTAL                     239      0   100%
diff --git a/logs/pip.out b/logs/pip.out
index f61bf91..03fb79a 100644
--- a/logs/pip.out
+++ b/logs/pip.out
@@ -1 +1 @@
-INFO: Successfully saved requirements file in /media/ph33r/Data/Project/mllib/Git/requirements.txt
+./bin/run_tests.sh: line 78: pipreqs: command not found
diff --git a/logs/pylint/lib-knn-py.out b/logs/pylint/lib-knn-py.out
index 48851ad..840218f 100644
--- a/logs/pylint/lib-knn-py.out
+++ b/logs/pylint/lib-knn-py.out
@@ -1,9 +1,9 @@
 ************* Module mllib.lib.knn
-knn.py:175:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:176:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:177:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:178:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:174:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:175:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:176:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:177:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
 
--------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 9.70/10, +0.30)
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
 
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index a2f2bd2..521b5f3 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -30,7 +30,7 @@
 import numpy as np
 
 from sklearn import neighbors as sn
-from sklearn.preprocessing import scale
+from sklearn.preprocessing import MinMaxScaler
 from sklearn import metrics as sk_metrics
 
 from sklearn.model_selection import GridSearchCV
@@ -108,7 +108,7 @@ def __init__(self,
         """Initialize variables for module ``KNN``."""
         self.y_var = y_var
         self.x_var = x_var
-        self.df = df[[self.y_var] + self.x_var].reset_index(drop=True)
+        self.df = df.reset_index(drop=True)
         self.method = method
         self.model = None
         self.k_fold = k_fold
@@ -124,10 +124,12 @@ def __init__(self,
         self._compute_metrics()
 
     def _pre_process(self):
-        """Pre-process the data, one hot encoding and scaling."""
+        """Pre-process the data, one hot encoding and Normalizing."""
         df_ip_x = pd.get_dummies(self.df[self.x_var])
         self.x_var = list(df_ip_x.columns)
-        df_ip_x = pd.DataFrame(scale(df_ip_x))
+        self.norm = MinMaxScaler()
+        self.norm.fit(df_ip_x)
+        df_ip_x = pd.DataFrame(self.norm.transform(df_ip_x[self.x_var]))
         df_ip_x.columns = self.x_var
         self.df = self.df[[self.y_var]].join(df_ip_x)
 
@@ -198,8 +200,14 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
             Pandas dataframe containing predicted `y_var` and `x_var`.
 
         """
-        df_predict = pd.DataFrame(scale(pd.get_dummies(df_predict)))
+        df_predict = pd.get_dummies(df_predict)
+        df_predict_tmp = pd.DataFrame(columns=self.x_var)
+        df_predict = pd.concat([df_predict_tmp, df_predict])
+        df_predict = df_predict.fillna(0)
+        df_predict = pd.DataFrame(self.norm.transform(df_predict[self.x_var]))
+        df_predict.columns = self.x_var
         y_hat = self.model.predict(df_predict)
         df_predict = df_predict.copy()
         df_predict["y"] = y_hat
+        df_predict = df_predict[[self.y_var] + self.x_var]
         return df_predict
diff --git a/tests/test_knn.py b/tests/test_knn.py
index ae2f2ce..7020044 100644
--- a/tests/test_knn.py
+++ b/tests/test_knn.py
@@ -91,6 +91,16 @@ def test_knn_reg(self):
         acc = round(sk_metrics.mean_squared_error(y, y_hat), 2)
         self.assertLessEqual(acc, 0.1)
 
+    def test_knn_cat(self):
+        """KNN: Test for dummies in prediction dataset."""
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[["y", "x1", "x5"]]
+        df_train = df_ip.iloc[1:140]
+        df_predict = df_ip.iloc[145:150]
+        mod = KNN(df_train, "y", ["x1", "x5"], method="classify")
+        df_predict_columns = mod.predict(df_predict).columns.tolist()
+        df_predict_columns.pop(0)
+        self.assertGreaterEqual(mod.x_var, df_predict_columns)
 
 # =============================================================================
 # --- Main

From d61fc5d044c6330abf741af3865d77180b6867ad Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Sun, 26 Sep 2021 18:22:40 +0530
Subject: [PATCH 08/13] v0.4.0

changelog:
-added ignore warnings decorator in unit tests
---
 mllib/__main__.py     |  2 +-
 mllib/lib/dev_knn.py  | 30 ++++++++++++++++++++++++++++++
 tests/test_cluster.py |  2 +-
 tests/test_knn.py     |  4 +++-
 tests/test_metrics.py |  2 +-
 tests/test_model.py   |  2 +-
 6 files changed, 37 insertions(+), 5 deletions(-)
 create mode 100644 mllib/lib/dev_knn.py

diff --git a/mllib/__main__.py b/mllib/__main__.py
index 7875e9e..4da6d36 100644
--- a/mllib/__main__.py
+++ b/mllib/__main__.py
@@ -95,7 +95,7 @@
     df_test = df_ip.drop(df_train.index)
     mod = KNN(df_train, "y", ["x1", "x2"], method="classify")
     print("\nKNN\n")
-    y_hat = mod.predict(df_test[["x1", "x2"]]).tolist()
+    y_hat = mod.predict(df_test[["x1", "x2"]])["y"].tolist()
     y = df_test["y"].values.tolist()
     accuracy = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
     print("Accuracy:", accuracy)
diff --git a/mllib/lib/dev_knn.py b/mllib/lib/dev_knn.py
new file mode 100644
index 0000000..501e83c
--- /dev/null
+++ b/mllib/lib/dev_knn.py
@@ -0,0 +1,30 @@
+import pandas as pd
+
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.metrics import classification_report
+
+path = "/media/ph33r/Data/Project/mllib/Git/data/input/"
+
+fn_ip = "iris.csv"
+
+df = pd.read_csv(path + fn_ip)
+
+y_var = ["y"]
+x_var = ["x1", "x2", "x3", "x4"]
+
+scaler = MinMaxScaler()
+scaler.fit(df[x_var])
+
+df_x_var = scaler.transform(df[x_var])
+df_y_var = df[y_var].values.ravel()
+
+classifier = KNeighborsClassifier(n_neighbors=3)
+classifier.fit(df_x_var, df_y_var)
+
+y_hat = classifier.predict(df_x_var)
+
+tmp = classification_report(y_hat, df_y_var, output_dict=True, zero_division=0)
+model_summary = tmp["weighted avg"]
+model_summary["accuracy"] = tmp["accuracy"]
+model_summary
diff --git a/tests/test_cluster.py b/tests/test_cluster.py
index efd74f4..f14e30b 100644
--- a/tests/test_cluster.py
+++ b/tests/test_cluster.py
@@ -44,7 +44,7 @@
 
 
 def ignore_warnings(test_func):
-    """Suppress deprecation warnings of pulp."""
+    """Suppress warnings."""
 
     def do_test(self, *args, **kwargs):
         with warnings.catch_warnings():
diff --git a/tests/test_knn.py b/tests/test_knn.py
index 7020044..e72d60c 100644
--- a/tests/test_knn.py
+++ b/tests/test_knn.py
@@ -48,7 +48,7 @@
 
 
 def ignore_warnings(test_func):
-    """Suppress deprecation warnings."""
+    """Suppress warnings."""
 
     def do_test(self, *args, **kwargs):
         with warnings.catch_warnings():
@@ -77,6 +77,7 @@ def test_knn_class(self):
         acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
         self.assertGreaterEqual(acc, 0.93)
 
+    @ignore_warnings
     def test_knn_reg(self):
         """KNN: Test for regression."""
         df_ip = pd.read_csv(path + "iris.csv")
@@ -102,6 +103,7 @@ def test_knn_cat(self):
         df_predict_columns.pop(0)
         self.assertGreaterEqual(mod.x_var, df_predict_columns)
 
+
 # =============================================================================
 # --- Main
 # =============================================================================
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index d9b7eac..948bec4 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -38,7 +38,7 @@
 
 
 def ignore_warnings(test_func):
-    """Suppress deprecation warnings of pulp."""
+    """Suppress warnings."""
 
     def do_test(self, *args, **kwargs):
         with warnings.catch_warnings():
diff --git a/tests/test_model.py b/tests/test_model.py
index a73901c..cc05ec4 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -45,7 +45,7 @@
 
 
 def ignore_warnings(test_func):
-    """Suppress deprecation warnings."""
+    """Suppress warnings."""
 
     def do_test(self, *args, **kwargs):
         with warnings.catch_warnings():

From 6e26a94594009a3fb1b0f17ca230f1c48ee37812 Mon Sep 17 00:00:00 2001
From: MadhuTangudu <madhu.tangudu@gmail.com>
Date: Sun, 26 Sep 2021 20:55:31 +0530
Subject: [PATCH 09/13] v0..4.0

changelog:
- code cleaning of knn.py, test_knn.py, model.py and test_model.py
---
 logs/cov.out         |  4 ++--
 mllib/lib/dev_knn.py | 30 ------------------------------
 mllib/lib/knn.py     | 30 ++++++++++++++----------------
 mllib/lib/model.py   | 12 ++++++------
 tests/test_knn.py    | 34 ++++++++++++++++++++--------------
 tests/test_model.py  |  4 ++--
 6 files changed, 44 insertions(+), 70 deletions(-)
 delete mode 100644 mllib/lib/dev_knn.py

diff --git a/logs/cov.out b/logs/cov.out
index a8c7525..1fbdc4b 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -3,7 +3,7 @@ Name                    Stmts   Miss  Cover   Missing
 mllib/__init__.py           7      0   100%
 mllib/lib/__init__.py       7      0   100%
 mllib/lib/cluster.py      103      0   100%
-mllib/lib/knn.py           77      0   100%
+mllib/lib/knn.py           74      0   100%
 mllib/lib/model.py         45      0   100%
 -----------------------------------------------------
-TOTAL                     239      0   100%
+TOTAL                     236      0   100%
diff --git a/mllib/lib/dev_knn.py b/mllib/lib/dev_knn.py
deleted file mode 100644
index 501e83c..0000000
--- a/mllib/lib/dev_knn.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import pandas as pd
-
-from sklearn.preprocessing import MinMaxScaler
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.metrics import classification_report
-
-path = "/media/ph33r/Data/Project/mllib/Git/data/input/"
-
-fn_ip = "iris.csv"
-
-df = pd.read_csv(path + fn_ip)
-
-y_var = ["y"]
-x_var = ["x1", "x2", "x3", "x4"]
-
-scaler = MinMaxScaler()
-scaler.fit(df[x_var])
-
-df_x_var = scaler.transform(df[x_var])
-df_y_var = df[y_var].values.ravel()
-
-classifier = KNeighborsClassifier(n_neighbors=3)
-classifier.fit(df_x_var, df_y_var)
-
-y_hat = classifier.predict(df_x_var)
-
-tmp = classification_report(y_hat, df_y_var, output_dict=True, zero_division=0)
-model_summary = tmp["weighted avg"]
-model_summary["accuracy"] = tmp["accuracy"]
-model_summary
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 521b5f3..9943baf 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -31,7 +31,7 @@
 
 from sklearn import neighbors as sn
 from sklearn.preprocessing import MinMaxScaler
-from sklearn import metrics as sk_metrics
+from sklearn.metrics import classification_report
 
 from sklearn.model_selection import GridSearchCV
 
@@ -62,11 +62,11 @@ class KNN():
 
     x_var : List[str]
 
-        Independant variables.
+        Independant variables
 
     method : str, optional
 
-        Can be either `classify` or `regression` (the default is classify)
+        Can be either `classify` or `regression` (the default is regression)
 
     k_fold : int, optional
 
@@ -93,7 +93,7 @@ class KNN():
 
     Example
     -------
-    >>> mod = KNN(df=df_ip, y_var=["y"], x_var=["x1", "x2", "x3"])
+    >>> mod = KNN(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"])
     >>> df_op = mod.predict(df_predict)
 
     """
@@ -102,7 +102,7 @@ def __init__(self,
                  df: pd.DataFrame,
                  y_var: str,
                  x_var: List[str],
-                 method: str = "classify",
+                 method: str = "regression",
                  k_fold: int = 5,
                  param: Dict = None):
         """Initialize variables for module ``KNN``."""
@@ -168,8 +168,8 @@ def _fit(self) -> Dict[str, Any]:
 
     def _compute_metrics(self):
         """Compute commonly used metrics to evaluate the model."""
-        y = self.df.iloc[:, 0].values.tolist()
-        y_hat = list(self.predict(self.df[self.x_var])["y"].values)
+        y = self.df.loc[:, self.y_var].values.tolist()
+        y_hat = list(self.predict(self.df[self.x_var])[self.y_var].values)
         if self.method == "regression":
             model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
                              "mae": np.round(metrics.mae(y, y_hat), 3),
@@ -177,11 +177,10 @@ def _compute_metrics(self):
                              "rmse": np.round(metrics.rmse(y, y_hat), 3)}
             model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
         if self.method == "classify":
-            accuracy = np.round(sk_metrics.accuracy_score(y, y_hat), 3)
-            f1_score = np.round(sk_metrics.f1_score(y, y_hat,
-                                                    average='micro'), 3)
-            model_summary = {"accuracy": accuracy,
-                             "f1": f1_score}
+            model_summary = classification_report(y_hat,
+                                                  y,
+                                                  output_dict=True,
+                                                  zero_division=0)
         self.model_summary = model_summary
 
     def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
@@ -200,6 +199,7 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
             Pandas dataframe containing predicted `y_var` and `x_var`.
 
         """
+        df_op = df_predict.copy(deep=True)
         df_predict = pd.get_dummies(df_predict)
         df_predict_tmp = pd.DataFrame(columns=self.x_var)
         df_predict = pd.concat([df_predict_tmp, df_predict])
@@ -207,7 +207,5 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
         df_predict = pd.DataFrame(self.norm.transform(df_predict[self.x_var]))
         df_predict.columns = self.x_var
         y_hat = self.model.predict(df_predict)
-        df_predict = df_predict.copy()
-        df_predict["y"] = y_hat
-        df_predict = df_predict[[self.y_var] + self.x_var]
-        return df_predict
+        df_op.insert(loc=0, column=self.y_var, value=y_hat)
+        return df_op
diff --git a/mllib/lib/model.py b/mllib/lib/model.py
index e8adf2a..c9057ed 100644
--- a/mllib/lib/model.py
+++ b/mllib/lib/model.py
@@ -56,7 +56,7 @@ class GLMNet():
 
         Pandas dataframe containing `y_var` and `x_var` variables.
 
-    y_var : List[str]
+    y_var : str
 
         Dependant variable.
 
@@ -108,12 +108,12 @@ class GLMNet():
 
     def __init__(self,
                  df: pd.DataFrame,
-                 y_var: List[str],
+                 y_var: str,
                  x_var: List[str],
                  strata: str = None,
                  param: Dict = None):
         """Initialize variables for module ``GLMNet``."""
-        self.df = df[y_var + x_var]
+        self.df = df[[y_var] + x_var]
         self.y_var = y_var
         self.x_var = x_var
         self.strata = strata
@@ -137,7 +137,7 @@ def _fit(self) -> None:
         """Fit the best GLMNet model."""
         train_x, test_x,\
             train_y, test_y = split(self.df[self.x_var],
-                                    self.df[self.y_var],
+                                    self.df[[self.y_var]],
                                     test_size=self.param["test_perc"],
                                     random_state=self.param["seed"],
                                     stratify=self.strata)
@@ -161,7 +161,7 @@ def _fit(self) -> None:
 
     def _compute_metrics(self):
         """Compute commonly used metrics to evaluate the model."""
-        y = self.df[self.y_var].iloc[:, 0].values.tolist()
+        y = self.df[[self.y_var]].iloc[:, 0].values.tolist()
         y_hat = list(self.predict(self.df[self.x_var])["y"].values)
         model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
                          "mae": np.round(metrics.mae(y, y_hat), 3),
@@ -188,5 +188,5 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
         """
         y_hat = self.model.predict(df_predict)
         df_predict = df_predict.copy()
-        df_predict["y"] = y_hat
+        df_predict.insert(loc=0, column=self.y_var, value=y_hat)
         return df_predict
diff --git a/tests/test_knn.py b/tests/test_knn.py
index e72d60c..b244270 100644
--- a/tests/test_knn.py
+++ b/tests/test_knn.py
@@ -65,40 +65,46 @@ def setUp(self):
 
     def test_knn_class(self):
         """KNN: Test for classification."""
+        x_var = ["x1", "x2"]
+        y_var = "y"
         df_ip = pd.read_csv(path + "iris.csv")
-        df_ip = df_ip[["y", "x1", "x2"]]
+        df_ip = df_ip[[y_var] + x_var]
         df_train, df_test = split(df_ip,
-                                  stratify=df_ip["y"],
+                                  stratify=df_ip[y_var],
                                   test_size=0.1,
                                   random_state=42)
-        mod = KNN(df_train, "y", ["x1", "x2"], method="classify")
-        y_hat = mod.predict(df_test[["x1", "x2"]])["y"].tolist()
-        y = df_test["y"].values.tolist()
+        mod = KNN(df_train, y_var, x_var, method="classify")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
         acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
         self.assertGreaterEqual(acc, 0.93)
 
     @ignore_warnings
     def test_knn_reg(self):
         """KNN: Test for regression."""
+        x_var = ["x1", "x2"]
+        y_var = "y"
         df_ip = pd.read_csv(path + "iris.csv")
-        df_ip = df_ip[["y", "x1", "x2"]]
+        df_ip = df_ip[[y_var] + x_var]
         df_train, df_test = split(df_ip,
-                                  stratify=df_ip["y"],
+                                  stratify=df_ip[y_var],
                                   test_size=0.1,
                                   random_state=42)
-        mod = KNN(df_train, "y", ["x1", "x2"], method="regression")
-        y_hat = mod.predict(df_test[["x1", "x2"]])["y"].tolist()
-        y = df_test["y"].values.tolist()
+        mod = KNN(df_train, y_var, x_var, method="regression")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
         acc = round(sk_metrics.mean_squared_error(y, y_hat), 2)
         self.assertLessEqual(acc, 0.1)
 
     def test_knn_cat(self):
-        """KNN: Test for dummies in prediction dataset."""
+        """KNN: Test for one-hot encoding in prediction."""
+        x_var = ["x1", "x2"]
+        y_var = "y"
         df_ip = pd.read_csv(path + "iris.csv")
-        df_ip = df_ip[["y", "x1", "x5"]]
+        df_ip = df_ip[[y_var] + x_var]
         df_train = df_ip.iloc[1:140]
-        df_predict = df_ip.iloc[145:150]
-        mod = KNN(df_train, "y", ["x1", "x5"], method="classify")
+        df_predict = df_ip.iloc[145:150, 1:]
+        mod = KNN(df_train, y_var, x_var, method="classify")
         df_predict_columns = mod.predict(df_predict).columns.tolist()
         df_predict_columns.pop(0)
         self.assertGreaterEqual(mod.x_var, df_predict_columns)
diff --git a/tests/test_model.py b/tests/test_model.py
index cc05ec4..a4c7ac1 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -64,7 +64,7 @@ def test_known_equation(self):
         """GLMNet: Test a known equation."""
         df_ip = pd.read_csv(path + "test_glmnet.csv")
         mod = GLMNet(df=df_ip,
-                     y_var=["y"],
+                     y_var="y",
                      x_var=["x1", "x2", "x3"])
         op = mod.opt
         self.assertEqual(np.round(op.get('intercept'), 0), 100.0)
@@ -76,7 +76,7 @@ def test_predict_target_variable(self):
         """GLMNet: Test to predict a target variable."""
         df_ip = pd.read_csv(path + "test_glmnet.csv")
         mod = GLMNet(df=df_ip,
-                     y_var=["y"],
+                     y_var="y",
                      x_var=["x1", "x2", "x3"])
         df_predict = pd.DataFrame({"x1": [10, 20],
                                    "x2": [5, 10],

From d66749e1efdcd67df3ee5cb2956778f3c5c8dd5a Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Sun, 26 Sep 2021 21:42:36 +0530
Subject: [PATCH 10/13] v0.4.0

---
 logs/cov.out       | 18 +++++++++---------
 logs/pip.out       |  2 +-
 mllib/__main__.py  | 10 ++++------
 mllib/lib/knn.py   | 18 ++++++++++++++----
 mllib/lib/model.py |  1 -
 requirements.txt   |  2 +-
 6 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/logs/cov.out b/logs/cov.out
index 1fbdc4b..ecff896 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,9 +1,9 @@
-Name                    Stmts   Miss  Cover   Missing
------------------------------------------------------
-mllib/__init__.py           7      0   100%
-mllib/lib/__init__.py       7      0   100%
-mllib/lib/cluster.py      103      0   100%
-mllib/lib/knn.py           74      0   100%
-mllib/lib/model.py         45      0   100%
------------------------------------------------------
-TOTAL                     236      0   100%
+Name                                                        Stmts   Miss  Cover   Missing
+-----------------------------------------------------------------------------------------
+/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           77      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         44      0   100%
+-----------------------------------------------------------------------------------------
+TOTAL                                                         238      0   100%
diff --git a/logs/pip.out b/logs/pip.out
index 03fb79a..f61bf91 100644
--- a/logs/pip.out
+++ b/logs/pip.out
@@ -1 +1 @@
-./bin/run_tests.sh: line 78: pipreqs: command not found
+INFO: Successfully saved requirements file in /media/ph33r/Data/Project/mllib/Git/requirements.txt
diff --git a/mllib/__main__.py b/mllib/__main__.py
index 4da6d36..4077d8e 100644
--- a/mllib/__main__.py
+++ b/mllib/__main__.py
@@ -80,8 +80,8 @@
     start_t = time.time_ns()
     df_ip = pd.read_csv(path + "input/test_glmnet.csv")
     glm_mod = GLMNet(df=df_ip,
-                     y_var=["y"],
-                     x_var=["x1", "x3"])
+                     y_var="y",
+                     x_var=["x1", "x2"])
     print("\nGLMNet\n")
     for k, v in glm_mod.model_summary.items():
         print(k, str(v).rjust(69 - len(k)))
@@ -95,10 +95,8 @@
     df_test = df_ip.drop(df_train.index)
     mod = KNN(df_train, "y", ["x1", "x2"], method="classify")
     print("\nKNN\n")
-    y_hat = mod.predict(df_test[["x1", "x2"]])["y"].tolist()
-    y = df_test["y"].values.tolist()
-    accuracy = round(len([i for i, j in zip(y, y_hat) if i == j]) / len(y), 2)
-    print("Accuracy:", accuracy)
+    for k, v in mod.model_summary.items():
+        print(k, str(v).rjust(69 - len(k)))
     print(elapsed_time("Time", start_t),
           sep="\n")
     # --- EOF
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 9943baf..284b718 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -87,6 +87,12 @@ class KNN():
 
         Final optimal model.
 
+    model_summary : Dict
+
+        Model summary containing key metrics like R-squared, RMSE, MSE, MAE,
+        MAPE for regression and Accuracy, Precision, Recall, F1 score for
+        classification.
+
     Methods
     -------
     predict
@@ -177,10 +183,14 @@ def _compute_metrics(self):
                              "rmse": np.round(metrics.rmse(y, y_hat), 3)}
             model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
         if self.method == "classify":
-            model_summary = classification_report(y_hat,
-                                                  y,
-                                                  output_dict=True,
-                                                  zero_division=0)
+            class_report = classification_report(y_hat,
+                                                 y,
+                                                 output_dict=True,
+                                                 zero_division=0)
+            model_summary = class_report["weighted avg"]
+            model_summary["accuracy"] = class_report["accuracy"]
+            model_summary = {key: round(model_summary[key], 3)
+                             for key in model_summary}
         self.model_summary = model_summary
 
     def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
diff --git a/mllib/lib/model.py b/mllib/lib/model.py
index c9057ed..efa6afc 100644
--- a/mllib/lib/model.py
+++ b/mllib/lib/model.py
@@ -187,6 +187,5 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
 
         """
         y_hat = self.model.predict(df_predict)
-        df_predict = df_predict.copy()
         df_predict.insert(loc=0, column=self.y_var, value=y_hat)
         return df_predict
diff --git a/requirements.txt b/requirements.txt
index cf8b072..66d1dec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-pandas==1.1.3
 Cython==0.29.15
 numpy==1.19.5
+pandas==1.1.3
 scikit_learn==1.0

From d17f82aa0d137f630732f6b55d9d1ba461c1984e Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Mon, 27 Sep 2021 10:59:23 +0530
Subject: [PATCH 11/13] v0.4.0

changelog:
- knn classification's default scorer changed to f1_weighted
- removed some redundant codes
---
 logs/cov.out               |  4 ++--
 logs/pylint/lib-knn-py.out |  8 ++++----
 mllib/__main__.py          |  5 +----
 mllib/lib/knn.py           | 26 +++++++++-----------------
 requirements.txt           |  2 +-
 5 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/logs/cov.out b/logs/cov.out
index ecff896..70db958 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -3,7 +3,7 @@ Name                                                        Stmts   Miss  Cover
 /media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           77      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           70      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         44      0   100%
 -----------------------------------------------------------------------------------------
-TOTAL                                                         238      0   100%
+TOTAL                                                         231      0   100%
diff --git a/logs/pylint/lib-knn-py.out b/logs/pylint/lib-knn-py.out
index 840218f..28f2b90 100644
--- a/logs/pylint/lib-knn-py.out
+++ b/logs/pylint/lib-knn-py.out
@@ -1,8 +1,8 @@
 ************* Module mllib.lib.knn
-knn.py:174:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:175:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:176:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:177:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:172:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:173:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:174:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:175:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
 
 --------------------------------------------------------------------
 Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
diff --git a/mllib/__main__.py b/mllib/__main__.py
index 4077d8e..7cbcca8 100644
--- a/mllib/__main__.py
+++ b/mllib/__main__.py
@@ -90,10 +90,7 @@
     # --- KNN
     start_t = time.time_ns()
     df_ip = pd.read_csv(path + "input/iris.csv")
-    df_ip = df_ip[["y", "x1", "x2"]]
-    df_train = df_ip.sample(frac=0.8, random_state=42)
-    df_test = df_ip.drop(df_train.index)
-    mod = KNN(df_train, "y", ["x1", "x2"], method="classify")
+    mod = KNN(df_ip, "y", ["x1", "x2", "x3", "x4"], method="classify")
     print("\nKNN\n")
     for k, v in mod.model_summary.items():
         print(k, str(v).rjust(69 - len(k)))
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 284b718..4169387 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -144,8 +144,10 @@ def _fit(self) -> Dict[str, Any]:
         if self.method == "classify":
             gs = GridSearchCV(estimator=sn.KNeighborsClassifier(),
                               param_grid=self.param,
-                              scoring='accuracy',
+                              scoring='f1_weighted',
                               verbose=0,
+                              refit=True,
+                              return_train_score=True,
                               cv=self.k_fold,
                               n_jobs=-1)
         elif self.method == "regression":
@@ -153,29 +155,19 @@ def _fit(self) -> Dict[str, Any]:
                               param_grid=self.param,
                               scoring='neg_root_mean_squared_error',
                               verbose=0,
+                              refit=True,
+                              return_train_score=True,
                               cv=self.k_fold,
                               n_jobs=-1)
         gs_op = gs.fit(self.df[self.x_var],
                        self.df[self.y_var])
-        opt_k = gs_op.best_params_.get("n_neighbors")
-        weight = gs_op.best_params_.get("weights")
-        metric = gs_op.best_params_.get("metric")
-        if self.method == "classify":
-            model = sn.KNeighborsClassifier(n_neighbors=opt_k,
-                                            weights=weight,
-                                            metric=metric)
-        elif self.method == "regression":
-            model = sn.KNeighborsRegressor(n_neighbors=opt_k,
-                                           weights=weight,
-                                           metric=metric)
-        self.model = model.fit(self.df[self.x_var],
-                               self.df[self.y_var])
+        self.model = gs_op
         return gs_op.best_params_
 
     def _compute_metrics(self):
         """Compute commonly used metrics to evaluate the model."""
         y = self.df.loc[:, self.y_var].values.tolist()
-        y_hat = list(self.predict(self.df[self.x_var])[self.y_var].values)
+        y_hat = list(self.model.predict(self.df[self.x_var]))
         if self.method == "regression":
             model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
                              "mae": np.round(metrics.mae(y, y_hat), 3),
@@ -183,8 +175,8 @@ def _compute_metrics(self):
                              "rmse": np.round(metrics.rmse(y, y_hat), 3)}
             model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
         if self.method == "classify":
-            class_report = classification_report(y_hat,
-                                                 y,
+            class_report = classification_report(y,
+                                                 y_hat,
                                                  output_dict=True,
                                                  zero_division=0)
             model_summary = class_report["weighted avg"]
diff --git a/requirements.txt b/requirements.txt
index 66d1dec..ef333fe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-Cython==0.29.15
 numpy==1.19.5
 pandas==1.1.3
+Cython==0.29.15
 scikit_learn==1.0

From d0a5e1e226b3b843a4da8735695b0a93e9835958 Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Mon, 27 Sep 2021 11:14:46 +0530
Subject: [PATCH 12/13] v0.4.0

---
 mllib/lib/knn.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 4169387..9e98369 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -87,6 +87,10 @@ class KNN():
 
         Final optimal model.
 
+    best_params_ : Dict
+
+        Best parameters amongst the given parameters.
+
     model_summary : Dict
 
         Model summary containing key metrics like R-squared, RMSE, MSE, MAE,

From 9c67779f9996fec5d5864f74cf57b6387818411f Mon Sep 17 00:00:00 2001
From: MadhuTangudu <madhu.tangudu@gmail.com>
Date: Mon, 27 Sep 2021 13:36:36 +0530
Subject: [PATCH 13/13] v0.4.0

changelog:
- local run test
---
 logs/cov.out               | 18 +++++++++---------
 logs/pip.out               |  2 +-
 logs/pylint/lib-knn-py.out |  8 ++++----
 mllib/lib/knn.py           |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/logs/cov.out b/logs/cov.out
index 70db958..f0a3c4b 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,9 +1,9 @@
-Name                                                        Stmts   Miss  Cover   Missing
------------------------------------------------------------------------------------------
-/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           70      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         44      0   100%
------------------------------------------------------------------------------------------
-TOTAL                                                         231      0   100%
+Name                    Stmts   Miss  Cover   Missing
+-----------------------------------------------------
+mllib/__init__.py           7      0   100%
+mllib/lib/__init__.py       7      0   100%
+mllib/lib/cluster.py      103      0   100%
+mllib/lib/knn.py           70      0   100%
+mllib/lib/model.py         44      0   100%
+-----------------------------------------------------
+TOTAL                     231      0   100%
diff --git a/logs/pip.out b/logs/pip.out
index f61bf91..03fb79a 100644
--- a/logs/pip.out
+++ b/logs/pip.out
@@ -1 +1 @@
-INFO: Successfully saved requirements file in /media/ph33r/Data/Project/mllib/Git/requirements.txt
+./bin/run_tests.sh: line 78: pipreqs: command not found
diff --git a/logs/pylint/lib-knn-py.out b/logs/pylint/lib-knn-py.out
index 28f2b90..ccf9413 100644
--- a/logs/pylint/lib-knn-py.out
+++ b/logs/pylint/lib-knn-py.out
@@ -1,8 +1,8 @@
 ************* Module mllib.lib.knn
-knn.py:172:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:173:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:174:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-knn.py:175:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:176:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:177:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:178:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+knn.py:179:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
 
 --------------------------------------------------------------------
 Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 9e98369..5b14b05 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -134,7 +134,7 @@ def __init__(self,
         self._compute_metrics()
 
     def _pre_process(self):
-        """Pre-process the data, one hot encoding and Normalizing."""
+        """Pre-process the data, one hot encoding and normalizing."""
         df_ip_x = pd.get_dummies(self.df[self.x_var])
         self.x_var = list(df_ip_x.columns)
         self.norm = MinMaxScaler()