From d7c73bbd919200393f4f5080e75481370745f1a8 Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Mon, 27 Sep 2021 16:54:53 +0530
Subject: [PATCH 1/4] v0.4.0

changelog:
- added RandomForest module with associated tests
---
 logs/cov.out                                |  19 +-
 logs/pip.out                                |   2 +-
 logs/pylint/lib-random_forest-py.out        |   9 +
 logs/pylint/tests-test_random_forest-py.out |   4 +
 mllib/lib/knn.py                            |   6 +-
 mllib/lib/random_forest.py                  | 187 ++++++++++++++++++++
 requirements.txt                            |   2 +-
 tests/test_knn.py                           |   4 +-
 tests/test_random_forest.py                 | 105 +++++++++++
 9 files changed, 322 insertions(+), 16 deletions(-)
 create mode 100644 logs/pylint/lib-random_forest-py.out
 create mode 100644 logs/pylint/tests-test_random_forest-py.out
 create mode 100644 mllib/lib/random_forest.py
 create mode 100644 tests/test_random_forest.py

diff --git a/logs/cov.out b/logs/cov.out
index f0a3c4b..4dbc976 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,9 +1,10 @@
-Name                    Stmts   Miss  Cover   Missing
------------------------------------------------------
-mllib/__init__.py           7      0   100%
-mllib/lib/__init__.py       7      0   100%
-mllib/lib/cluster.py      103      0   100%
-mllib/lib/knn.py           70      0   100%
-mllib/lib/model.py         44      0   100%
------------------------------------------------------
-TOTAL                     231      0   100%
+Name                                                             Stmts   Miss  Cover   Missing
+----------------------------------------------------------------------------------------------
+/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py                7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py            7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py           103      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py                70      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py              44      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/random_forest.py      61      0   100%
+----------------------------------------------------------------------------------------------
+TOTAL                                                              292      0   100%
diff --git a/logs/pip.out b/logs/pip.out
index 03fb79a..f61bf91 100644
--- a/logs/pip.out
+++ b/logs/pip.out
@@ -1 +1 @@
-./bin/run_tests.sh: line 78: pipreqs: command not found
+INFO: Successfully saved requirements file in /media/ph33r/Data/Project/mllib/Git/requirements.txt
diff --git a/logs/pylint/lib-random_forest-py.out b/logs/pylint/lib-random_forest-py.out
new file mode 100644
index 0000000..ade91ae
--- /dev/null
+++ b/logs/pylint/lib-random_forest-py.out
@@ -0,0 +1,9 @@
+************* Module mllib.lib.random_forest
+random_forest.py:146:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+random_forest.py:147:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+random_forest.py:148:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+random_forest.py:149:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
+
diff --git a/logs/pylint/tests-test_random_forest-py.out b/logs/pylint/tests-test_random_forest-py.out
new file mode 100644
index 0000000..d7495ee
--- /dev/null
+++ b/logs/pylint/tests-test_random_forest-py.out
@@ -0,0 +1,4 @@
+
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
+
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index 5b14b05..ba5d08d 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -189,7 +189,7 @@ def _compute_metrics(self):
                              for key in model_summary}
         self.model_summary = model_summary
 
-    def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
+    def predict(self, x_predict: pd.DataFrame) -> pd.DataFrame:
         """Predict y_var/target variable.
 
         Parameters
@@ -205,8 +205,8 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame:
             Pandas dataframe containing predicted `y_var` and `x_var`.
 
         """
-        df_op = df_predict.copy(deep=True)
-        df_predict = pd.get_dummies(df_predict)
+        df_op = x_predict.copy(deep=True)
+        df_predict = pd.get_dummies(x_predict)
         df_predict_tmp = pd.DataFrame(columns=self.x_var)
         df_predict = pd.concat([df_predict_tmp, df_predict])
         df_predict = df_predict.fillna(0)
diff --git a/mllib/lib/random_forest.py b/mllib/lib/random_forest.py
new file mode 100644
index 0000000..33685ab
--- /dev/null
+++ b/mllib/lib/random_forest.py
@@ -0,0 +1,187 @@
+"""
+Random Forest module.
+
+**Available routines:**
+
+- class ``RandomForest``: Builds Random Forest model using cross validation.
+
+Credits
+-------
+::
+
+    Authors:
+        - Diptesh
+        - Madhu
+
+    Date: Sep 27, 2021
+"""
+
+# pylint: disable=invalid-name
+# pylint: disable=R0902,R0903,R0913,C0413
+
+from typing import List, Dict, Any
+
+import re
+import sys
+from inspect import getsourcefile
+from os.path import abspath
+
+import pandas as pd
+import numpy as np
+import sklearn.ensemble as rf
+
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import classification_report
+
+path = abspath(getsourcefile(lambda: 0))
+path = re.sub(r"(.+\/)(.+.py)", "\\1", path)
+sys.path.insert(0, path)
+
+import metrics  # noqa: F841
+
+
+class RandomForest():
+    """Random forest module.
+
+    Objective:
+        - Build
+          `Random forest <https://en.wikipedia.org/wiki/Random_forest>`_
+          model and determine optimal k
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+
+        Pandas dataframe containing the `y_var` and `x_var`
+
+    y_var : str
+
+        Dependant variable
+
+    x_var : List[str]
+
+        Independant variables
+
+    method : str, optional
+
+        Can be either `classify` or `regression` (the default is regression)
+
+    k_fold : int, optional
+
+        Number of cross validations folds (the default is 5)
+
+    param : dict, optional
+
+        Random forest parameters (the default is None).
+        In case of None, the parameters will default to::
+
+            bootstrap: [True]
+            max_depth: [1, len(x_var)]
+            n_estimators: [1000]
+            max_features: ["sqrt", "auto"]
+            min_samples_leaf: [2, 5]
+
+    Returns
+    -------
+    model : object
+
+        Final optimal model.
+
+    best_params_ : Dict
+
+        Best parameters amongst the given parameters.
+
+    model_summary : Dict
+
+        Model summary containing key metrics like R-squared, RMSE, MSE, MAE,
+        MAPE for regression and Accuracy, Precision, Recall, F1 score for
+        classification.
+
+    Methods
+    -------
+    predict
+
+    Example
+    -------
+    >>> mod = RandomForest(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"])
+    >>> df_op = mod.predict(df_predict)
+
+    """
+
+    def __init__(self,
+                 df: pd.DataFrame,
+                 y_var: str,
+                 x_var: List[str],
+                 method: str = "regression",
+                 k_fold: int = 5,
+                 param: Dict = None):
+        """Initialize variables for module ``RandomForest``."""
+        self.y_var = y_var
+        self.x_var = x_var
+        self.df = df.reset_index(drop=True)
+        self.method = method
+        self.model = None
+        self.k_fold = k_fold
+        self.seed = 1
+        if param is None:
+            param = {"bootstrap": [True],
+                     "max_depth": list(range(1, len(x_var))),
+                     "n_estimators": [1000]}
+            if method == "classify":
+                param["max_features"] = ["sqrt"]
+                param["min_samples_leaf"] = [2]
+            elif method == "regression":
+                param["max_features"] = [int(len(x_var) / 3)]
+                param["min_samples_leaf"] = [5]
+        self.param = param
+        self.best_params_ = self._fit()
+        self.model_summary = None
+        self._compute_metrics()
+
+    def _compute_metrics(self):
+        """Compute commonly used metrics to evaluate the model."""
+        y = self.df.loc[:, self.y_var].values.tolist()
+        y_hat = list(self.model.predict(self.df[self.x_var]))
+        if self.method == "regression":
+            model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
+                             "mae": np.round(metrics.mae(y, y_hat), 3),
+                             "mape": np.round(metrics.mape(y, y_hat), 3),
+                             "rmse": np.round(metrics.rmse(y, y_hat), 3)}
+            model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
+        if self.method == "classify":
+            class_report = classification_report(y,
+                                                 y_hat,
+                                                 output_dict=True,
+                                                 zero_division=0)
+            model_summary = class_report["weighted avg"]
+            model_summary["accuracy"] = class_report["accuracy"]
+            model_summary = {key: round(model_summary[key], 3)
+                             for key in model_summary}
+        self.model_summary = model_summary
+
+    def _fit(self) -> Dict[str, Any]:
+        """Fit RandomForest model."""
+        if self.method == "classify":
+            tmp_model = rf.RandomForestClassifier(oob_score=True,
+                                                  random_state=self.seed)
+        elif self.method == "regression":
+            tmp_model = rf.RandomForestRegressor(oob_score=True,
+                                                 random_state=self.seed)
+        gs = GridSearchCV(estimator=tmp_model,
+                          param_grid=self.param,
+                          n_jobs=-1,
+                          verbose=0,
+                          refit=True,
+                          return_train_score=True,
+                          cv=self.k_fold)
+        gs_op = gs.fit(self.df[self.x_var],
+                       self.df[self.y_var])
+        self.model = gs_op
+        return gs_op.best_params_
+
+    def predict(self, x_predict: pd.DataFrame) -> pd.DataFrame:
+        """Predict values."""
+        df_op = x_predict.copy(deep=True)
+        y_hat = self.model.predict(x_predict)
+        df_op.insert(loc=0, column=self.y_var, value=y_hat)
+        return df_op
diff --git a/requirements.txt b/requirements.txt
index ef333fe..86af4d0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy==1.19.5
 pandas==1.1.3
+numpy==1.19.5
 Cython==0.29.15
 scikit_learn==1.0
diff --git a/tests/test_knn.py b/tests/test_knn.py
index b244270..a4c0547 100644
--- a/tests/test_knn.py
+++ b/tests/test_knn.py
@@ -93,8 +93,8 @@ def test_knn_reg(self):
         mod = KNN(df_train, y_var, x_var, method="regression")
         y_hat = mod.predict(df_test[x_var])[y_var].tolist()
         y = df_test[y_var].values.tolist()
-        acc = round(sk_metrics.mean_squared_error(y, y_hat), 2)
-        self.assertLessEqual(acc, 0.1)
+        mse = round(sk_metrics.mean_squared_error(y, y_hat), 2)
+        self.assertLessEqual(mse, 0.1)
 
     def test_knn_cat(self):
         """KNN: Test for one-hot encoding in prediction."""
diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py
new file mode 100644
index 0000000..f0127f3
--- /dev/null
+++ b/tests/test_random_forest.py
@@ -0,0 +1,105 @@
+"""
+Test suite module for ``random_forest``.
+
+Credits
+-------
+::
+
+    Authors:
+        - Diptesh
+        - Madhu
+
+    Date: Sep 27, 2021
+"""
+
+# pylint: disable=invalid-name
+# pylint: disable=wrong-import-position
+
+import unittest
+import warnings
+import re
+import sys
+
+from inspect import getsourcefile
+from os.path import abspath
+
+import pandas as pd
+
+from sklearn.model_selection import train_test_split as split
+from sklearn import metrics as sk_metrics
+
+# Set base path
+path = abspath(getsourcefile(lambda: 0))
+path = re.sub(r"(.+)(\/tests.*)", "\\1", path)
+
+sys.path.insert(0, path)
+
+from mllib.lib.random_forest import RandomForest  # noqa: F841
+
+# =============================================================================
+# --- DO NOT CHANGE ANYTHING FROM HERE
+# =============================================================================
+
+path = path + "/data/input/"
+
+# =============================================================================
+# --- User defined functions
+# =============================================================================
+
+
+def ignore_warnings(test_func):
+    """Suppress warnings."""
+
+    def do_test(self, *args, **kwargs):
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            test_func(self, *args, **kwargs)
+    return do_test
+
+
+class Test_Knn(unittest.TestCase):
+    """Test suite for module ``KNN``."""
+
+    def setUp(self):
+        """Set up for module ``KNN``."""
+
+    def test_rf_class(self):
+        """RandomForest: Test for classification."""
+        x_var = ["x1", "x2", "x3", "x4"]
+        y_var = "y"
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[[y_var] + x_var]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip[y_var],
+                                  test_size=0.2,
+                                  random_state=42)
+        mod = RandomForest(df_train, y_var, x_var, method="classify")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
+        acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
+        self.assertGreaterEqual(acc, 0.93)
+
+    @ignore_warnings
+    def test_knn_reg(self):
+        """RandomForest: Test for regression."""
+        x_var = ["x1", "x2", "x3", "x4"]
+        y_var = "y"
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[[y_var] + x_var]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip[y_var],
+                                  test_size=0.2,
+                                  random_state=42)
+        mod = RandomForest(df_train, y_var, x_var, method="regression")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
+        mse = round(sk_metrics.mean_squared_error(y, y_hat), 2)
+        self.assertLessEqual(mse, 0.1)
+
+
+# =============================================================================
+# --- Main
+# =============================================================================
+
+if __name__ == '__main__':
+    unittest.main()

From 38787dfc6c1bf273d3bb3f4e99931b90dbc6dbac Mon Sep 17 00:00:00 2001
From: MadhuTangudu <madhu.tangudu@gmail.com>
Date: Mon, 27 Sep 2021 19:48:02 +0530
Subject: [PATCH 2/4] v0.4.0

---
 mllib/lib/xgboost.py  | 199 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_xgboost.py | 105 ++++++++++++++++++++++
 2 files changed, 304 insertions(+)
 create mode 100644 mllib/lib/xgboost.py
 create mode 100644 tests/test_xgboost.py

diff --git a/mllib/lib/xgboost.py b/mllib/lib/xgboost.py
new file mode 100644
index 0000000..9d82dff
--- /dev/null
+++ b/mllib/lib/xgboost.py
@@ -0,0 +1,199 @@
+"""
+Random Forest module.
+
+**Available routines:**
+
+- class ``XGBoost``: Builds XGBoost model using cross validation.
+
+Credits
+-------
+::
+
+    Authors:
+        - Diptesh
+        - Madhu
+
+    Date: Sep 27, 2021
+"""
+
+# pylint: disable=invalid-name
+# pylint: disable=R0902,R0903,R0913,C0413
+
+from typing import List, Dict, Any
+
+import re
+import sys
+from inspect import getsourcefile
+from os.path import abspath
+
+import pandas as pd
+import numpy as np
+import xgboost as xgb
+
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import classification_report
+
+path = abspath(getsourcefile(lambda: 0))
+path = re.sub(r"(.+\/)(.+.py)", "\\1", path)
+sys.path.insert(0, path)
+
+import metrics  # noqa: F841
+
+
+class XGBoost():
+    """Random forest module.
+
+    Objective:
+        - Build
+          `XGBoost < https://en.wikipedia.org/wiki/XGBoost >'
+          model and determine optimal k
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+
+        Pandas dataframe containing the `y_var` and `x_var`
+
+    y_var : str
+
+        Dependant variable
+
+    x_var : List[str]
+
+        Independant variables
+
+    method : str, optional
+
+        Can be either `classify` or `regression` (the default is regression)
+
+    k_fold : int, optional
+
+        Number of cross validations folds (the default is 5)
+
+    param : dict, optional
+
+        XGBoost parameters (the default is None).
+        In case of None, the parameters will default to::
+
+            n_estimators: [1000]
+            learning_rate: [i/1000 for i in range(2, 11)]
+            subsample: [i/10 for i in range(5, 10)]
+            colsample_bytree: [i/10 for i in range(1, 11)]
+            min_child_weight: list(range(1, 11))
+            max_depth: [1, len(x_var)]
+            gamma: list(range(1, len(x_var)))
+            objective: ["reg:squarederror"]
+
+    Returns
+    -------
+    model : object
+
+        Final optimal model.
+
+    best_params_ : Dict
+
+        Best parameters amongst the given parameters.
+
+    model_summary : Dict
+
+        Model summary containing key metrics like R-squared, RMSE, MSE, MAE,
+        MAPE for regression and Accuracy, Precision, Recall, F1 score for
+        classification.
+
+    Methods
+    -------
+    predict
+
+    Example
+    -------
+    >>> mod = XGBoost(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"])
+    >>> df_op = mod.predict(df_predict)
+
+    """
+
+    def __init__(self,
+                 df: pd.DataFrame,
+                 y_var: str,
+                 x_var: List[str],
+                 method: str = "regression",
+                 k_fold: int = 5,
+                 param: Dict = None):
+        """Initialize variables for module ``XGBoost``."""
+        self.y_var = y_var
+        self.x_var = x_var
+        self.df = df.reset_index(drop=True)
+        self.method = method
+        self.model = None
+        self.k_fold = k_fold
+        self.seed = 1
+        if param is None:
+            param = {"n_estimators": [1000],
+                           "learning_rate": [i/1000 for i in range(2, 11)],
+                           "subsample": [i/10 for i in range(5, 10)],
+                           "colsample_bytree": [i/10 for i in range(1, 11)],
+                           "min_child_weight": list(range(1, 11)),
+                           "max_depth": list(range(1, len(x_var))),
+                           "gamma": list(range(0, 21)),
+                           }
+            if method == "classify":
+                param["objective"] = ["binary:logistic"]
+            elif method == "regression":
+                param["objective"] = ["reg:squarederror"]
+        self.param = param
+        self.best_params_ = self._fit()
+        self.model_summary = None
+        self._compute_metrics()
+
+    def _compute_metrics(self):
+        """Compute commonly used metrics to evaluate the model."""
+        y = self.df.loc[:, self.y_var].values.tolist()
+        y_hat = list(self.model.predict(self.df[self.x_var]))
+        if self.method == "regression":
+            model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
+                             "mae": np.round(metrics.mae(y, y_hat), 3),
+                             "mape": np.round(metrics.mape(y, y_hat), 3),
+                             "rmse": np.round(metrics.rmse(y, y_hat), 3)}
+            model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
+        if self.method == "classify":
+            class_report = classification_report(y,
+                                                 y_hat,
+                                                 output_dict=True,
+                                                 zero_division=0)
+            model_summary = class_report["weighted avg"]
+            model_summary["accuracy"] = class_report["accuracy"]
+            model_summary = {key: round(model_summary[key], 3)
+                             for key in model_summary}
+        self.model_summary = model_summary
+
+    def _fit(self) -> Dict[str, Any]:
+        """Fit XGBoost model."""
+        if self.method == "classify":
+            tmp_model = xgb.XGBClassifier(n_jobs=-1,
+                                         verbosity=0,
+                                         silent=True,
+                                         random_state=self.seed,
+                                         seed=self.seed)
+        elif self.method == "regression":
+            tmp_model = xgb.XGBRegressor(n_jobs=-1,
+                                         verbosity=0,
+                                         silent=True,
+                                         random_state=self.seed,
+                                         seed=self.seed)
+        gs = GridSearchCV(estimator=tmp_model,
+                          param_grid=self.param,
+                          n_jobs=-1,
+                          verbose=0,
+                          refit=True,
+                          return_train_score=True,
+                          cv=self.k_fold)
+        gs_op = gs.fit(self.df[self.x_var],
+                       self.df[self.y_var])
+        self.model = gs_op
+        return gs_op.best_params_
+
+    def predict(self, x_predict: pd.DataFrame) -> pd.DataFrame:
+        """Predict values."""
+        df_op = x_predict.copy(deep=True)
+        y_hat = self.model.predict(x_predict)
+        df_op.insert(loc=0, column=self.y_var, value=y_hat)
+        return df_op
diff --git a/tests/test_xgboost.py b/tests/test_xgboost.py
new file mode 100644
index 0000000..d155db0
--- /dev/null
+++ b/tests/test_xgboost.py
@@ -0,0 +1,105 @@
+"""
+Test suite module for ``XGBoost``.
+
+Credits
+-------
+::
+
+    Authors:
+        - Diptesh
+        - Madhu
+
+    Date: Sep 27, 2021
+"""
+
+# pylint: disable=invalid-name
+# pylint: disable=wrong-import-position
+
+import unittest
+import warnings
+import re
+import sys
+
+from inspect import getsourcefile
+from os.path import abspath
+
+import pandas as pd
+
+from sklearn.model_selection import train_test_split as split
+from sklearn import metrics as sk_metrics
+
+# Set base path
+path = abspath(getsourcefile(lambda: 0))
+path = re.sub(r"(.+)(\/tests.*)", "\\1", path)
+
+sys.path.insert(0, path)
+
+from mllib.lib.xgboost import XGBoost  # noqa: F841
+
+# =============================================================================
+# --- DO NOT CHANGE ANYTHING FROM HERE
+# =============================================================================
+
+path = path + "/data/input/"
+
+# =============================================================================
+# --- User defined functions
+# =============================================================================
+
+
+def ignore_warnings(test_func):
+    """Suppress warnings."""
+
+    def do_test(self, *args, **kwargs):
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            test_func(self, *args, **kwargs)
+    return do_test
+
+
+class Test_XGBoost(unittest.TestCase):
+    """Test suite for module ``XGBoost``."""
+
+    def setUp(self):
+        """Set up for module ``XGBoost``."""
+
+    def test_rf_class(self):
+        """XGBoost: Test for classification."""
+        x_var = ["x1", "x2", "x3", "x4"]
+        y_var = "y"
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[[y_var] + x_var]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip[y_var],
+                                  test_size=0.2,
+                                  random_state=42)
+        mod = XGBoost(df_train, y_var, x_var, method="classify")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
+        acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
+        self.assertGreaterEqual(acc, 0.93)
+
+    @ignore_warnings
+    def test_knn_reg(self):
+        """XGBoost: Test for regression."""
+        x_var = ["x1", "x2", "x3", "x4"]
+        y_var = "y"
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[[y_var] + x_var]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip[y_var],
+                                  test_size=0.2,
+                                  random_state=42)
+        mod = XGBoost(df_train, y_var, x_var, method="regression")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
+        mse = round(sk_metrics.mean_squared_error(y, y_hat), 2)
+        self.assertLessEqual(mse, 0.1)
+
+
+# =============================================================================
+# --- Main
+# =============================================================================
+
+if __name__ == '__main__':
+    unittest.main()

From 352c968d165a4a3fee63bffcdaf86983addd938d Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Mon, 27 Sep 2021 23:04:38 +0530
Subject: [PATCH 3/4] v0.4.0

changelog:
- added XGBoost module with associated tests
---
 logs/cov.out                             |  3 +-
 logs/pylint/lib-boost-py.out             |  9 ++++
 logs/pylint/tests-test_boost-py.out      |  4 ++
 mllib/lib/{xgboost.py => boost.py}       | 52 ++++++++++++------------
 requirements.txt                         |  3 +-
 tests/{test_xgboost.py => test_boost.py} | 21 ++++++----
 tests/test_random_forest.py              |  8 ++--
 7 files changed, 61 insertions(+), 39 deletions(-)
 create mode 100644 logs/pylint/lib-boost-py.out
 create mode 100644 logs/pylint/tests-test_boost-py.out
 rename mllib/lib/{xgboost.py => boost.py} (77%)
 rename tests/{test_xgboost.py => test_boost.py} (82%)

diff --git a/logs/cov.out b/logs/cov.out
index 4dbc976..5117727 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -2,9 +2,10 @@ Name                                                             Stmts   Miss  C
 ----------------------------------------------------------------------------------------------
 /media/ph33r/Data/Project/mllib/Git/mllib/__init__.py                7      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py            7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/boost.py              53      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py           103      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py                70      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py              44      0   100%
 /media/ph33r/Data/Project/mllib/Git/mllib/lib/random_forest.py      61      0   100%
 ----------------------------------------------------------------------------------------------
-TOTAL                                                              292      0   100%
+TOTAL                                                              345      0   100%
diff --git a/logs/pylint/lib-boost-py.out b/logs/pylint/lib-boost-py.out
new file mode 100644
index 0000000..aed4fa6
--- /dev/null
+++ b/logs/pylint/lib-boost-py.out
@@ -0,0 +1,9 @@
+************* Module mllib.lib.boost
+boost.py:152:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+boost.py:153:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+boost.py:154:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+boost.py:155:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
+
diff --git a/logs/pylint/tests-test_boost-py.out b/logs/pylint/tests-test_boost-py.out
new file mode 100644
index 0000000..d7495ee
--- /dev/null
+++ b/logs/pylint/tests-test_boost-py.out
@@ -0,0 +1,4 @@
+
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
+
diff --git a/mllib/lib/xgboost.py b/mllib/lib/boost.py
similarity index 77%
rename from mllib/lib/xgboost.py
rename to mllib/lib/boost.py
index 9d82dff..64dd6cd 100644
--- a/mllib/lib/xgboost.py
+++ b/mllib/lib/boost.py
@@ -1,5 +1,5 @@
 """
-Random Forest module.
+XGBoost module.
 
 **Available routines:**
 
@@ -30,7 +30,7 @@
 import numpy as np
 import xgboost as xgb
 
-from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import RandomizedSearchCV
 from sklearn.metrics import classification_report
 
 path = abspath(getsourcefile(lambda: 0))
@@ -41,11 +41,11 @@
 
 
 class XGBoost():
-    """Random forest module.
+    """XGBoost module.
 
     Objective:
         - Build
-          `XGBoost < https://en.wikipedia.org/wiki/XGBoost >'
+          `XGBoost <https://en.wikipedia.org/wiki/XGBoost>`_
           model and determine optimal k
 
     Parameters
@@ -81,8 +81,8 @@ class XGBoost():
             colsample_bytree: [i/10 for i in range(1, 11)]
             min_child_weight: list(range(1, 11))
             max_depth: [1, len(x_var)]
-            gamma: list(range(1, len(x_var)))
-            objective: ["reg:squarederror"]
+            gamma: list(np.arange(0.0, 1.1, 0.25))
+            objective: ["reg:squarederror", "binary:logistic"]
 
     Returns
     -------
@@ -126,15 +126,15 @@ def __init__(self,
         self.model = None
         self.k_fold = k_fold
         self.seed = 1
-        if param is None:
+        if param is None:  # pragma: no cover
             param = {"n_estimators": [1000],
-                           "learning_rate": [i/1000 for i in range(2, 11)],
-                           "subsample": [i/10 for i in range(5, 10)],
-                           "colsample_bytree": [i/10 for i in range(1, 11)],
-                           "min_child_weight": list(range(1, 11)),
-                           "max_depth": list(range(1, len(x_var))),
-                           "gamma": list(range(0, 21)),
-                           }
+                     "learning_rate": [0.001, 0.01, 0.1, 0.2, 0.3],
+                     "subsample": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+                     "colsample_bytree": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+                     "colsample_bylevel": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+                     "min_child_weight": [0.5, 1.0, 3.0, 5.0, 7.0, 10.0],
+                     "max_depth": list(range(1, len(x_var))),
+                     "gamma": [0, 0.25, 0.5, 1.0]}
             if method == "classify":
                 param["objective"] = ["binary:logistic"]
             elif method == "regression":
@@ -169,23 +169,25 @@ def _fit(self) -> Dict[str, Any]:
         """Fit XGBoost model."""
         if self.method == "classify":
             tmp_model = xgb.XGBClassifier(n_jobs=-1,
-                                         verbosity=0,
-                                         silent=True,
-                                         random_state=self.seed,
-                                         seed=self.seed)
+                                          verbosity=0,
+                                          silent=True,
+                                          random_state=self.seed,
+                                          seed=self.seed,
+                                          use_label_encoder=False)
         elif self.method == "regression":
             tmp_model = xgb.XGBRegressor(n_jobs=-1,
                                          verbosity=0,
                                          silent=True,
                                          random_state=self.seed,
                                          seed=self.seed)
-        gs = GridSearchCV(estimator=tmp_model,
-                          param_grid=self.param,
-                          n_jobs=-1,
-                          verbose=0,
-                          refit=True,
-                          return_train_score=True,
-                          cv=self.k_fold)
+        gs = RandomizedSearchCV(estimator=tmp_model,
+                                param_distributions=self.param,
+                                n_jobs=-1,
+                                verbose=0,
+                                refit=True,
+                                return_train_score=True,
+                                cv=self.k_fold,
+                                random_state=self.seed)
         gs_op = gs.fit(self.df[self.x_var],
                        self.df[self.y_var])
         self.model = gs_op
diff --git a/requirements.txt b/requirements.txt
index 86af4d0..0070676 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 pandas==1.1.3
-numpy==1.19.5
 Cython==0.29.15
+xgboost==1.3.3
+numpy==1.19.5
 scikit_learn==1.0
diff --git a/tests/test_xgboost.py b/tests/test_boost.py
similarity index 82%
rename from tests/test_xgboost.py
rename to tests/test_boost.py
index d155db0..2532c37 100644
--- a/tests/test_xgboost.py
+++ b/tests/test_boost.py
@@ -34,7 +34,7 @@
 
 sys.path.insert(0, path)
 
-from mllib.lib.xgboost import XGBoost  # noqa: F841
+from mllib.lib.boost import XGBoost  # noqa: F841
 
 # =============================================================================
 # --- DO NOT CHANGE ANYTHING FROM HERE
@@ -63,7 +63,8 @@ class Test_XGBoost(unittest.TestCase):
     def setUp(self):
         """Set up for module ``XGBoost``."""
 
-    def test_rf_class(self):
+    @ignore_warnings
+    def test_xgboost_class(self):
         """XGBoost: Test for classification."""
         x_var = ["x1", "x2", "x3", "x4"]
         y_var = "y"
@@ -72,15 +73,17 @@ def test_rf_class(self):
         df_train, df_test = split(df_ip,
                                   stratify=df_ip[y_var],
                                   test_size=0.2,
-                                  random_state=42)
-        mod = XGBoost(df_train, y_var, x_var, method="classify")
+                                  random_state=1)
+        mod = XGBoost(df_train, y_var, x_var, method="classify",
+                      param={"n_estimators": [1],
+                             "objective": ["binary:logistic"]})
         y_hat = mod.predict(df_test[x_var])[y_var].tolist()
         y = df_test[y_var].values.tolist()
         acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
         self.assertGreaterEqual(acc, 0.93)
 
     @ignore_warnings
-    def test_knn_reg(self):
+    def test_xgboost_reg(self):
         """XGBoost: Test for regression."""
         x_var = ["x1", "x2", "x3", "x4"]
         y_var = "y"
@@ -89,12 +92,14 @@ def test_knn_reg(self):
         df_train, df_test = split(df_ip,
                                   stratify=df_ip[y_var],
                                   test_size=0.2,
-                                  random_state=42)
-        mod = XGBoost(df_train, y_var, x_var, method="regression")
+                                  random_state=1)
+        mod = XGBoost(df_train, y_var, x_var, method="regression",
+                      param={"n_estimators": [1],
+                             "objective": ["reg:squarederror"]})
         y_hat = mod.predict(df_test[x_var])[y_var].tolist()
         y = df_test[y_var].values.tolist()
         mse = round(sk_metrics.mean_squared_error(y, y_hat), 2)
-        self.assertLessEqual(mse, 0.1)
+        self.assertLessEqual(mse, 0.5)
 
 
 # =============================================================================
diff --git a/tests/test_random_forest.py b/tests/test_random_forest.py
index f0127f3..54bb0f3 100644
--- a/tests/test_random_forest.py
+++ b/tests/test_random_forest.py
@@ -57,11 +57,11 @@ def do_test(self, *args, **kwargs):
     return do_test
 
 
-class Test_Knn(unittest.TestCase):
-    """Test suite for module ``KNN``."""
+class Test_RandomForest(unittest.TestCase):
+    """Test suite for module ``RandomForest``."""
 
     def setUp(self):
-        """Set up for module ``KNN``."""
+        """Set up for module ``RandomForest``."""
 
     def test_rf_class(self):
         """RandomForest: Test for classification."""
@@ -80,7 +80,7 @@ def test_rf_class(self):
         self.assertGreaterEqual(acc, 0.93)
 
     @ignore_warnings
-    def test_knn_reg(self):
+    def test_rf_reg(self):
         """RandomForest: Test for regression."""
         x_var = ["x1", "x2", "x3", "x4"]
         y_var = "y"

From af7cfa0ba59eca9470fa47d5d52377ce462fd49b Mon Sep 17 00:00:00 2001
From: Diptesh Basak <bdiptesh@gmail.com>
Date: Tue, 28 Sep 2021 02:09:09 +0530
Subject: [PATCH 4/4] v0.4.0

changelog:
- combined RandomForest and XGBoost into one single module namely, tree
---
 logs/cov.out                                  |  21 +-
 logs/pylint/lib-boost-py.out                  |   9 -
 logs/pylint/lib-glmnet_ts-py.out              |   9 -
 logs/pylint/lib-random_forest-py.out          |   9 -
 logs/pylint/lib-tree-py.out                   |   9 +
 logs/pylint/tests-test_boost-py.out           |   4 -
 logs/pylint/tests-test_glmnet_ts-py.out       |   4 -
 logs/pylint/tests-test_random_forest-py.out   |   4 -
 ...b-glmnet-py.out => tests-test_tree-py.out} |   0
 mllib/__main__.py                             |  24 ++
 mllib/lib/knn.py                              |   2 +-
 mllib/lib/random_forest.py                    | 187 -------------
 mllib/lib/{boost.py => tree.py}               | 254 +++++++++++++-----
 requirements.txt                              |   4 +-
 tests/test_boost.py                           | 110 --------
 tests/{test_random_forest.py => test_tree.py} |  46 +++-
 16 files changed, 271 insertions(+), 425 deletions(-)
 delete mode 100644 logs/pylint/lib-boost-py.out
 delete mode 100644 logs/pylint/lib-glmnet_ts-py.out
 delete mode 100644 logs/pylint/lib-random_forest-py.out
 create mode 100644 logs/pylint/lib-tree-py.out
 delete mode 100644 logs/pylint/tests-test_boost-py.out
 delete mode 100644 logs/pylint/tests-test_glmnet_ts-py.out
 delete mode 100644 logs/pylint/tests-test_random_forest-py.out
 rename logs/pylint/{lib-glmnet-py.out => tests-test_tree-py.out} (100%)
 delete mode 100644 mllib/lib/random_forest.py
 rename mllib/lib/{boost.py => tree.py} (55%)
 delete mode 100644 tests/test_boost.py
 rename tests/{test_random_forest.py => test_tree.py} (63%)

diff --git a/logs/cov.out b/logs/cov.out
index 5117727..eed6fca 100644
--- a/logs/cov.out
+++ b/logs/cov.out
@@ -1,11 +1,10 @@
-Name                                                             Stmts   Miss  Cover   Missing
-----------------------------------------------------------------------------------------------
-/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py                7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py            7      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/boost.py              53      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py           103      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py                70      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py              44      0   100%
-/media/ph33r/Data/Project/mllib/Git/mllib/lib/random_forest.py      61      0   100%
-----------------------------------------------------------------------------------------------
-TOTAL                                                              345      0   100%
+Name                                                        Stmts   Miss  Cover   Missing
+-----------------------------------------------------------------------------------------
+/media/ph33r/Data/Project/mllib/Git/mllib/__init__.py           7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/__init__.py       7      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/cluster.py      103      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/knn.py           70      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/model.py         44      0   100%
+/media/ph33r/Data/Project/mllib/Git/mllib/lib/tree.py          79      0   100%
+-----------------------------------------------------------------------------------------
+TOTAL                                                         310      0   100%
diff --git a/logs/pylint/lib-boost-py.out b/logs/pylint/lib-boost-py.out
deleted file mode 100644
index aed4fa6..0000000
--- a/logs/pylint/lib-boost-py.out
+++ /dev/null
@@ -1,9 +0,0 @@
-************* Module mllib.lib.boost
-boost.py:152:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-boost.py:153:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-boost.py:154:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-boost.py:155:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-
---------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
-
diff --git a/logs/pylint/lib-glmnet_ts-py.out b/logs/pylint/lib-glmnet_ts-py.out
deleted file mode 100644
index 77fd809..0000000
--- a/logs/pylint/lib-glmnet_ts-py.out
+++ /dev/null
@@ -1,9 +0,0 @@
-************* Module mllib.lib.glmnet_ts
-glmnet_ts.py:238:41: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-glmnet_ts.py:239:41: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-glmnet_ts.py:240:42: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-glmnet_ts.py:241:42: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-
---------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
-
diff --git a/logs/pylint/lib-random_forest-py.out b/logs/pylint/lib-random_forest-py.out
deleted file mode 100644
index ade91ae..0000000
--- a/logs/pylint/lib-random_forest-py.out
+++ /dev/null
@@ -1,9 +0,0 @@
-************* Module mllib.lib.random_forest
-random_forest.py:146:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-random_forest.py:147:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-random_forest.py:148:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-random_forest.py:149:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
-
---------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
-
diff --git a/logs/pylint/lib-tree-py.out b/logs/pylint/lib-tree-py.out
new file mode 100644
index 0000000..1b9facf
--- /dev/null
+++ b/logs/pylint/lib-tree-py.out
@@ -0,0 +1,9 @@
+************* Module mllib.lib.tree
+tree.py:73:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+tree.py:74:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+tree.py:75:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+tree.py:76:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member)
+
+--------------------------------------------------------------------
+Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
+
diff --git a/logs/pylint/tests-test_boost-py.out b/logs/pylint/tests-test_boost-py.out
deleted file mode 100644
index d7495ee..0000000
--- a/logs/pylint/tests-test_boost-py.out
+++ /dev/null
@@ -1,4 +0,0 @@
-
---------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
-
diff --git a/logs/pylint/tests-test_glmnet_ts-py.out b/logs/pylint/tests-test_glmnet_ts-py.out
deleted file mode 100644
index d7495ee..0000000
--- a/logs/pylint/tests-test_glmnet_ts-py.out
+++ /dev/null
@@ -1,4 +0,0 @@
-
---------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
-
diff --git a/logs/pylint/tests-test_random_forest-py.out b/logs/pylint/tests-test_random_forest-py.out
deleted file mode 100644
index d7495ee..0000000
--- a/logs/pylint/tests-test_random_forest-py.out
+++ /dev/null
@@ -1,4 +0,0 @@
-
---------------------------------------------------------------------
-Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)
-
diff --git a/logs/pylint/lib-glmnet-py.out b/logs/pylint/tests-test_tree-py.out
similarity index 100%
rename from logs/pylint/lib-glmnet-py.out
rename to logs/pylint/tests-test_tree-py.out
diff --git a/mllib/__main__.py b/mllib/__main__.py
index 7cbcca8..d5bc8be 100644
--- a/mllib/__main__.py
+++ b/mllib/__main__.py
@@ -29,6 +29,8 @@
 from lib.cluster import Cluster  # noqa: F841
 from lib.model import GLMNet  # noqa: F841
 from lib.knn import KNN  # noqa: F841
+from lib.tree import RandomForest  # noqa: F841
+from lib.tree import XGBoost  # noqa: F841
 
 # =============================================================================
 # --- DO NOT CHANGE ANYTHING FROM HERE
@@ -92,6 +94,28 @@
     df_ip = pd.read_csv(path + "input/iris.csv")
     mod = KNN(df_ip, "y", ["x1", "x2", "x3", "x4"], method="classify")
     print("\nKNN\n")
+    for k, v in mod.model_summary.items():
+        print(k, str(v).rjust(69 - len(k)))
+    print(elapsed_time("Time", start_t),
+          sep="\n")
+    # --- Random forest
+    start_t = time.time_ns()
+    df_ip = pd.read_csv(path + "input/iris.csv")
+    x_var = ["x1", "x2", "x3", "x4"]
+    y_var = "y"
+    mod = RandomForest(df_ip, y_var, x_var, method="classify")
+    print("\nRandom forest\n")
+    for k, v in mod.model_summary.items():
+        print(k, str(v).rjust(69 - len(k)))
+    print(elapsed_time("Time", start_t),
+          sep="\n")
+    # --- XGBoost
+    start_t = time.time_ns()
+    df_ip = pd.read_csv(path + "input/iris.csv")
+    x_var = ["x1", "x2", "x3", "x4"]
+    y_var = "y"
+    mod = XGBoost(df_ip, y_var, x_var, method="classify")
+    print("\nXGBoost\n")
     for k, v in mod.model_summary.items():
         print(k, str(v).rjust(69 - len(k)))
     print(elapsed_time("Time", start_t),
diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py
index ba5d08d..7483e22 100644
--- a/mllib/lib/knn.py
+++ b/mllib/lib/knn.py
@@ -104,7 +104,7 @@ class KNN():
     Example
     -------
     >>> mod = KNN(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"])
-    >>> df_op = mod.predict(df_predict)
+    >>> df_op = mod.predict(x_predict)
 
     """
 
diff --git a/mllib/lib/random_forest.py b/mllib/lib/random_forest.py
deleted file mode 100644
index 33685ab..0000000
--- a/mllib/lib/random_forest.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""
-Random Forest module.
-
-**Available routines:**
-
-- class ``RandomForest``: Builds Random Forest model using cross validation.
-
-Credits
--------
-::
-
-    Authors:
-        - Diptesh
-        - Madhu
-
-    Date: Sep 27, 2021
-"""
-
-# pylint: disable=invalid-name
-# pylint: disable=R0902,R0903,R0913,C0413
-
-from typing import List, Dict, Any
-
-import re
-import sys
-from inspect import getsourcefile
-from os.path import abspath
-
-import pandas as pd
-import numpy as np
-import sklearn.ensemble as rf
-
-from sklearn.model_selection import GridSearchCV
-from sklearn.metrics import classification_report
-
-path = abspath(getsourcefile(lambda: 0))
-path = re.sub(r"(.+\/)(.+.py)", "\\1", path)
-sys.path.insert(0, path)
-
-import metrics  # noqa: F841
-
-
-class RandomForest():
-    """Random forest module.
-
-    Objective:
-        - Build
-          `Random forest <https://en.wikipedia.org/wiki/Random_forest>`_
-          model and determine optimal k
-
-    Parameters
-    ----------
-    df : pandas.DataFrame
-
-        Pandas dataframe containing the `y_var` and `x_var`
-
-    y_var : str
-
-        Dependant variable
-
-    x_var : List[str]
-
-        Independant variables
-
-    method : str, optional
-
-        Can be either `classify` or `regression` (the default is regression)
-
-    k_fold : int, optional
-
-        Number of cross validations folds (the default is 5)
-
-    param : dict, optional
-
-        Random forest parameters (the default is None).
-        In case of None, the parameters will default to::
-
-            bootstrap: [True]
-            max_depth: [1, len(x_var)]
-            n_estimators: [1000]
-            max_features: ["sqrt", "auto"]
-            min_samples_leaf: [2, 5]
-
-    Returns
-    -------
-    model : object
-
-        Final optimal model.
-
-    best_params_ : Dict
-
-        Best parameters amongst the given parameters.
-
-    model_summary : Dict
-
-        Model summary containing key metrics like R-squared, RMSE, MSE, MAE,
-        MAPE for regression and Accuracy, Precision, Recall, F1 score for
-        classification.
-
-    Methods
-    -------
-    predict
-
-    Example
-    -------
-    >>> mod = RandomForest(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"])
-    >>> df_op = mod.predict(df_predict)
-
-    """
-
-    def __init__(self,
-                 df: pd.DataFrame,
-                 y_var: str,
-                 x_var: List[str],
-                 method: str = "regression",
-                 k_fold: int = 5,
-                 param: Dict = None):
-        """Initialize variables for module ``RandomForest``."""
-        self.y_var = y_var
-        self.x_var = x_var
-        self.df = df.reset_index(drop=True)
-        self.method = method
-        self.model = None
-        self.k_fold = k_fold
-        self.seed = 1
-        if param is None:
-            param = {"bootstrap": [True],
-                     "max_depth": list(range(1, len(x_var))),
-                     "n_estimators": [1000]}
-            if method == "classify":
-                param["max_features"] = ["sqrt"]
-                param["min_samples_leaf"] = [2]
-            elif method == "regression":
-                param["max_features"] = [int(len(x_var) / 3)]
-                param["min_samples_leaf"] = [5]
-        self.param = param
-        self.best_params_ = self._fit()
-        self.model_summary = None
-        self._compute_metrics()
-
-    def _compute_metrics(self):
-        """Compute commonly used metrics to evaluate the model."""
-        y = self.df.loc[:, self.y_var].values.tolist()
-        y_hat = list(self.model.predict(self.df[self.x_var]))
-        if self.method == "regression":
-            model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
-                             "mae": np.round(metrics.mae(y, y_hat), 3),
-                             "mape": np.round(metrics.mape(y, y_hat), 3),
-                             "rmse": np.round(metrics.rmse(y, y_hat), 3)}
-            model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
-        if self.method == "classify":
-            class_report = classification_report(y,
-                                                 y_hat,
-                                                 output_dict=True,
-                                                 zero_division=0)
-            model_summary = class_report["weighted avg"]
-            model_summary["accuracy"] = class_report["accuracy"]
-            model_summary = {key: round(model_summary[key], 3)
-                             for key in model_summary}
-        self.model_summary = model_summary
-
-    def _fit(self) -> Dict[str, Any]:
-        """Fit RandomForest model."""
-        if self.method == "classify":
-            tmp_model = rf.RandomForestClassifier(oob_score=True,
-                                                  random_state=self.seed)
-        elif self.method == "regression":
-            tmp_model = rf.RandomForestRegressor(oob_score=True,
-                                                 random_state=self.seed)
-        gs = GridSearchCV(estimator=tmp_model,
-                          param_grid=self.param,
-                          n_jobs=-1,
-                          verbose=0,
-                          refit=True,
-                          return_train_score=True,
-                          cv=self.k_fold)
-        gs_op = gs.fit(self.df[self.x_var],
-                       self.df[self.y_var])
-        self.model = gs_op
-        return gs_op.best_params_
-
-    def predict(self, x_predict: pd.DataFrame) -> pd.DataFrame:
-        """Predict values."""
-        df_op = x_predict.copy(deep=True)
-        y_hat = self.model.predict(x_predict)
-        df_op.insert(loc=0, column=self.y_var, value=y_hat)
-        return df_op
diff --git a/mllib/lib/boost.py b/mllib/lib/tree.py
similarity index 55%
rename from mllib/lib/boost.py
rename to mllib/lib/tree.py
index 64dd6cd..5a33fb9 100644
--- a/mllib/lib/boost.py
+++ b/mllib/lib/tree.py
@@ -1,8 +1,9 @@
 """
-XGBoost module.
+Tree based models.
 
 **Available routines:**
 
+- class ``RandomForest``: Builds Random Forest model using cross validation.
 - class ``XGBoost``: Builds XGBoost model using cross validation.
 
 Credits
@@ -28,6 +29,7 @@
 
 import pandas as pd
 import numpy as np
+import sklearn.ensemble as rf
 import xgboost as xgb
 
 from sklearn.model_selection import RandomizedSearchCV
@@ -40,7 +42,163 @@
 import metrics  # noqa: F841
 
 
-class XGBoost():
+class Tree():
+    """Parent class for tree based models."""
+
+    def __init__(self,
+                 df: pd.DataFrame,
+                 y_var: str,
+                 x_var: List[str],
+                 method: str = "regression",
+                 k_fold: int = 5,
+                 param: Dict = None):
+        """Initialize variables."""
+        self.y_var = y_var
+        self.x_var = x_var
+        self.df = df.reset_index(drop=True)
+        self.method = method
+        self.k_fold = k_fold
+        self.seed = 1
+        self.model = None
+        self.model_summary = None
+        self.param = param
+        self.best_params_ = self._fit()
+        self._compute_metrics()
+
+    def _compute_metrics(self):
+        """Compute commonly used metrics to evaluate the model."""
+        y = self.df.loc[:, self.y_var].values.tolist()
+        y_hat = list(self.model.predict(self.df[self.x_var]))
+        if self.method == "regression":
+            model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
+                             "mae": np.round(metrics.mae(y, y_hat), 3),
+                             "mape": np.round(metrics.mape(y, y_hat), 3),
+                             "rmse": np.round(metrics.rmse(y, y_hat), 3)}
+            model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
+        if self.method == "classify":
+            class_report = classification_report(y,
+                                                 y_hat,
+                                                 output_dict=True,
+                                                 zero_division=0)
+            model_summary = class_report["weighted avg"]
+            model_summary["accuracy"] = class_report["accuracy"]
+            model_summary = {key: round(model_summary[key], 3)
+                             for key in model_summary}
+        self.model_summary = model_summary
+
+    def _fit(self) -> Dict[str, Any]:  # pragma: no cover
+        """Fit model."""
+        return self.best_params_
+
+    def predict(self, x_predict: pd.DataFrame) -> pd.DataFrame:
+        """Predict values."""
+        df_op = x_predict.copy(deep=True)
+        y_hat = self.model.predict(x_predict)
+        df_op.insert(loc=0, column=self.y_var, value=y_hat)
+        return df_op
+
+
+class RandomForest(Tree):
+    """Random forest module.
+
+    Objective:
+        - Build
+          `Random forest <https://en.wikipedia.org/wiki/Random_forest>`_
+          model and determine optimal k
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+
+        Pandas dataframe containing the `y_var` and `x_var`
+
+    y_var : str
+
+        Dependant variable
+
+    x_var : List[str]
+
+        Independant variables
+
+    method : str, optional
+
+        Can be either `classify` or `regression` (the default is regression)
+
+    k_fold : int, optional
+
+        Number of cross validations folds (the default is 5)
+
+    param : dict, optional
+
+        Random forest parameters (the default is None).
+        In case of None, the parameters will default to::
+
+            bootstrap: [True]
+            max_depth: [1, len(x_var)]
+            n_estimators: [1000]
+            max_features: ["sqrt", "auto"]
+            min_samples_leaf: [2, 5]
+
+    Returns
+    -------
+    model : object
+
+        Final optimal model.
+
+    best_params_ : Dict
+
+        Best parameters amongst the given parameters.
+
+    model_summary : Dict
+
+        Model summary containing key metrics like R-squared, RMSE, MSE, MAE,
+        MAPE for regression and Accuracy, Precision, Recall, F1 score for
+        classification.
+
+    Methods
+    -------
+    predict
+
+    Example
+    -------
+    >>> mod = RandomForest(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"])
+    >>> df_op = mod.predict(x_predict)
+
+    """
+
+    def _fit(self) -> Dict[str, Any]:
+        """Fit RandomForest model."""
+        if self.param is None:
+            self.param = {"bootstrap": [True],
+                          "max_depth": list(range(1, len(self.x_var))),
+                          "n_estimators": [100]}
+            if self.method == "classify":
+                self.param["max_features"] = ["sqrt"]
+                self.param["min_samples_leaf"] = [2]
+            elif self.method == "regression":
+                self.param["max_features"] = [int(len(self.x_var) / 3)]
+                self.param["min_samples_leaf"] = [5]
+        if self.method == "classify":
+            tmp_model = rf.RandomForestClassifier(oob_score=True,
+                                                  random_state=self.seed)
+        elif self.method == "regression":
+            tmp_model = rf.RandomForestRegressor(oob_score=True,
+                                                 random_state=self.seed)
+        gs = RandomizedSearchCV(estimator=tmp_model,
+                                param_distributions=self.param,
+                                n_jobs=-1,
+                                verbose=0,
+                                refit=True,
+                                n_iter=3,
+                                return_train_score=True,
+                                cv=self.k_fold)
+        gs_op = gs.fit(self.df[self.x_var],
+                       self.df[self.y_var])
+        self.model = gs_op
+        return gs_op.best_params_
+
+
+class XGBoost(Tree):
     """XGBoost module.
 
     Objective:
@@ -75,13 +233,12 @@ class XGBoost():
         XGBoost parameters (the default is None).
         In case of None, the parameters will default to::
 
-            n_estimators: [1000]
-            learning_rate: [i/1000 for i in range(2, 11)]
-            subsample: [i/10 for i in range(5, 10)]
-            colsample_bytree: [i/10 for i in range(1, 11)]
-            min_child_weight: list(range(1, 11))
-            max_depth: [1, len(x_var)]
-            gamma: list(np.arange(0.0, 1.1, 0.25))
+            n_estimators: [100]
+            learning_rate: [0.01, 0.1, 0.2, 0.3]
+            subsample: [0.5, 0.75, 1.0]
+            colsample_bytree: [0.5, 1.0]
+            min_child_weight: [0.5, 1.0, 3.0]
+            max_depth: [int(len(self.x_var) * 0.8)]
             objective: ["reg:squarederror", "binary:logistic"]
 
     Returns
@@ -107,75 +264,32 @@ class XGBoost():
     Example
     -------
     >>> mod = XGBoost(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"])
-    >>> df_op = mod.predict(df_predict)
+    >>> df_op = mod.predict(x_predict)
 
     """
 
-    def __init__(self,
-                 df: pd.DataFrame,
-                 y_var: str,
-                 x_var: List[str],
-                 method: str = "regression",
-                 k_fold: int = 5,
-                 param: Dict = None):
-        """Initialize variables for module ``XGBoost``."""
-        self.y_var = y_var
-        self.x_var = x_var
-        self.df = df.reset_index(drop=True)
-        self.method = method
-        self.model = None
-        self.k_fold = k_fold
-        self.seed = 1
-        if param is None:  # pragma: no cover
-            param = {"n_estimators": [1000],
-                     "learning_rate": [0.001, 0.01, 0.1, 0.2, 0.3],
-                     "subsample": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
-                     "colsample_bytree": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
-                     "colsample_bylevel": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
-                     "min_child_weight": [0.5, 1.0, 3.0, 5.0, 7.0, 10.0],
-                     "max_depth": list(range(1, len(x_var))),
-                     "gamma": [0, 0.25, 0.5, 1.0]}
-            if method == "classify":
-                param["objective"] = ["binary:logistic"]
-            elif method == "regression":
-                param["objective"] = ["reg:squarederror"]
-        self.param = param
-        self.best_params_ = self._fit()
-        self.model_summary = None
-        self._compute_metrics()
-
-    def _compute_metrics(self):
-        """Compute commonly used metrics to evaluate the model."""
-        y = self.df.loc[:, self.y_var].values.tolist()
-        y_hat = list(self.model.predict(self.df[self.x_var]))
-        if self.method == "regression":
-            model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3),
-                             "mae": np.round(metrics.mae(y, y_hat), 3),
-                             "mape": np.round(metrics.mape(y, y_hat), 3),
-                             "rmse": np.round(metrics.rmse(y, y_hat), 3)}
-            model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3)
-        if self.method == "classify":
-            class_report = classification_report(y,
-                                                 y_hat,
-                                                 output_dict=True,
-                                                 zero_division=0)
-            model_summary = class_report["weighted avg"]
-            model_summary["accuracy"] = class_report["accuracy"]
-            model_summary = {key: round(model_summary[key], 3)
-                             for key in model_summary}
-        self.model_summary = model_summary
-
     def _fit(self) -> Dict[str, Any]:
         """Fit XGBoost model."""
+        if self.param is None:
+            self.param = {"n_estimators": [100],
+                          "learning_rate": [0.01, 0.1, 0.2, 0.3],
+                          "subsample": [0.5, 0.75, 1.0],
+                          "colsample_bytree": [0.5, 1.0],
+                          "min_child_weight": [0.5, 1.0, 3.0],
+                          "max_depth": [int(len(self.x_var) * 0.8)]}
+            if self.method == "classify":
+                self.param["objective"] = ["binary:logistic"]
+            elif self.method == "regression":
+                self.param["objective"] = ["reg:squarederror"]
         if self.method == "classify":
-            tmp_model = xgb.XGBClassifier(n_jobs=-1,
+            tmp_model = xgb.XGBClassifier(n_jobs=1,
                                           verbosity=0,
                                           silent=True,
                                           random_state=self.seed,
                                           seed=self.seed,
                                           use_label_encoder=False)
         elif self.method == "regression":
-            tmp_model = xgb.XGBRegressor(n_jobs=-1,
+            tmp_model = xgb.XGBRegressor(n_jobs=1,
                                          verbosity=0,
                                          silent=True,
                                          random_state=self.seed,
@@ -185,6 +299,7 @@ def _fit(self) -> Dict[str, Any]:
                                 n_jobs=-1,
                                 verbose=0,
                                 refit=True,
+                                n_iter=10,
                                 return_train_score=True,
                                 cv=self.k_fold,
                                 random_state=self.seed)
@@ -192,10 +307,3 @@ def _fit(self) -> Dict[str, Any]:
                        self.df[self.y_var])
         self.model = gs_op
         return gs_op.best_params_
-
-    def predict(self, x_predict: pd.DataFrame) -> pd.DataFrame:
-        """Predict values."""
-        df_op = x_predict.copy(deep=True)
-        y_hat = self.model.predict(x_predict)
-        df_op.insert(loc=0, column=self.y_var, value=y_hat)
-        return df_op
diff --git a/requirements.txt b/requirements.txt
index 0070676..7715ab9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-pandas==1.1.3
 Cython==0.29.15
-xgboost==1.3.3
 numpy==1.19.5
+pandas==1.1.3
+xgboost==1.3.3
 scikit_learn==1.0
diff --git a/tests/test_boost.py b/tests/test_boost.py
deleted file mode 100644
index 2532c37..0000000
--- a/tests/test_boost.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""
-Test suite module for ``XGBoost``.
-
-Credits
--------
-::
-
-    Authors:
-        - Diptesh
-        - Madhu
-
-    Date: Sep 27, 2021
-"""
-
-# pylint: disable=invalid-name
-# pylint: disable=wrong-import-position
-
-import unittest
-import warnings
-import re
-import sys
-
-from inspect import getsourcefile
-from os.path import abspath
-
-import pandas as pd
-
-from sklearn.model_selection import train_test_split as split
-from sklearn import metrics as sk_metrics
-
-# Set base path
-path = abspath(getsourcefile(lambda: 0))
-path = re.sub(r"(.+)(\/tests.*)", "\\1", path)
-
-sys.path.insert(0, path)
-
-from mllib.lib.boost import XGBoost  # noqa: F841
-
-# =============================================================================
-# --- DO NOT CHANGE ANYTHING FROM HERE
-# =============================================================================
-
-path = path + "/data/input/"
-
-# =============================================================================
-# --- User defined functions
-# =============================================================================
-
-
-def ignore_warnings(test_func):
-    """Suppress warnings."""
-
-    def do_test(self, *args, **kwargs):
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            test_func(self, *args, **kwargs)
-    return do_test
-
-
-class Test_XGBoost(unittest.TestCase):
-    """Test suite for module ``XGBoost``."""
-
-    def setUp(self):
-        """Set up for module ``XGBoost``."""
-
-    @ignore_warnings
-    def test_xgboost_class(self):
-        """XGBoost: Test for classification."""
-        x_var = ["x1", "x2", "x3", "x4"]
-        y_var = "y"
-        df_ip = pd.read_csv(path + "iris.csv")
-        df_ip = df_ip[[y_var] + x_var]
-        df_train, df_test = split(df_ip,
-                                  stratify=df_ip[y_var],
-                                  test_size=0.2,
-                                  random_state=1)
-        mod = XGBoost(df_train, y_var, x_var, method="classify",
-                      param={"n_estimators": [1],
-                             "objective": ["binary:logistic"]})
-        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
-        y = df_test[y_var].values.tolist()
-        acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
-        self.assertGreaterEqual(acc, 0.93)
-
-    @ignore_warnings
-    def test_xgboost_reg(self):
-        """XGBoost: Test for regression."""
-        x_var = ["x1", "x2", "x3", "x4"]
-        y_var = "y"
-        df_ip = pd.read_csv(path + "iris.csv")
-        df_ip = df_ip[[y_var] + x_var]
-        df_train, df_test = split(df_ip,
-                                  stratify=df_ip[y_var],
-                                  test_size=0.2,
-                                  random_state=1)
-        mod = XGBoost(df_train, y_var, x_var, method="regression",
-                      param={"n_estimators": [1],
-                             "objective": ["reg:squarederror"]})
-        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
-        y = df_test[y_var].values.tolist()
-        mse = round(sk_metrics.mean_squared_error(y, y_hat), 2)
-        self.assertLessEqual(mse, 0.5)
-
-
-# =============================================================================
-# --- Main
-# =============================================================================
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/test_random_forest.py b/tests/test_tree.py
similarity index 63%
rename from tests/test_random_forest.py
rename to tests/test_tree.py
index 54bb0f3..5ae5fc2 100644
--- a/tests/test_random_forest.py
+++ b/tests/test_tree.py
@@ -1,5 +1,5 @@
 """
-Test suite module for ``random_forest``.
+Test suite module for ``XGBoost``.
 
 Credits
 -------
@@ -34,7 +34,8 @@
 
 sys.path.insert(0, path)
 
-from mllib.lib.random_forest import RandomForest  # noqa: F841
+from mllib.lib.tree import RandomForest  # noqa: F841
+from mllib.lib.tree import XGBoost  # noqa: F841
 
 # =============================================================================
 # --- DO NOT CHANGE ANYTHING FROM HERE
@@ -97,6 +98,47 @@ def test_rf_reg(self):
         self.assertLessEqual(mse, 0.1)
 
 
+class Test_XGBoost(unittest.TestCase):
+    """Test suite for module ``XGBoost``."""
+
+    def setUp(self):
+        """Set up for module ``XGBoost``."""
+
+    @ignore_warnings
+    def test_xgboost_class(self):
+        """XGBoost: Test for classification."""
+        x_var = ["x1", "x2"]
+        y_var = "y"
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[[y_var] + x_var]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip[y_var],
+                                  test_size=0.2,
+                                  random_state=1)
+        mod = XGBoost(df_train, y_var, x_var, method="classify")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
+        acc = round(sk_metrics.accuracy_score(y, y_hat), 2)
+        self.assertGreaterEqual(acc, 0.93)
+
+    @ignore_warnings
+    def test_xgboost_reg(self):
+        """XGBoost: Test for regression."""
+        x_var = ["x1", "x2", "x3", "x4"]
+        y_var = "y"
+        df_ip = pd.read_csv(path + "iris.csv")
+        df_ip = df_ip[[y_var] + x_var]
+        df_train, df_test = split(df_ip,
+                                  stratify=df_ip[y_var],
+                                  test_size=0.2,
+                                  random_state=1)
+        mod = XGBoost(df_train, y_var, x_var, method="regression")
+        y_hat = mod.predict(df_test[x_var])[y_var].tolist()
+        y = df_test[y_var].values.tolist()
+        mse = round(sk_metrics.mean_squared_error(y, y_hat), 2)
+        self.assertLessEqual(mse, 0.5)
+
+
 # =============================================================================
 # --- Main
 # =============================================================================