diff --git a/data/input/iris.csv b/data/input/iris.csv new file mode 100644 index 0000000..2e5cab0 --- /dev/null +++ b/data/input/iris.csv @@ -0,0 +1,151 @@ +x3,x4,x1,x2,x5,y +5.1,3.5,1.4,0.2,a,0 +4.9,3,1.4,0.2,a,0 +4.7,3.2,1.3,0.2,a,0 +4.6,3.1,1.5,0.2,a,0 +5,3.6,1.4,0.2,a,0 +5.4,3.9,1.7,0.4,a,0 +4.6,3.4,1.4,0.3,a,0 +5,3.4,1.5,0.2,a,0 +4.4,2.9,1.4,0.2,e,0 +4.9,3.1,1.5,0.1,e,0 +5.4,3.7,1.5,0.2,e,0 +4.8,3.4,1.6,0.2,e,0 +4.8,3,1.4,0.1,e,0 +4.3,3,1.1,0.1,e,0 +5.8,4,1.2,0.2,e,0 +5.7,4.4,1.5,0.4,e,0 +5.4,3.9,1.3,0.4,e,0 +5.1,3.5,1.4,0.3,e,0 +5.7,3.8,1.7,0.3,e,0 +5.1,3.8,1.5,0.3,s,0 +5.4,3.4,1.7,0.2,s,0 +5.1,3.7,1.5,0.4,s,0 +4.6,3.6,1,0.2,s,0 +5.1,3.3,1.7,0.5,s,0 +4.8,3.4,1.9,0.2,s,0 +5,3,1.6,0.2,s,0 +5,3.4,1.6,0.4,s,0 +5.2,3.5,1.5,0.2,s,0 +5.2,3.4,1.4,0.2,s,0 +4.7,3.2,1.6,0.2,s,0 +4.8,3.1,1.6,0.2,e,0 +5.4,3.4,1.5,0.4,s,0 +5.2,4.1,1.5,0.1,a,0 +5.5,4.2,1.4,0.2,s,0 +4.9,3.1,1.5,0.2,a,0 +5,3.2,1.2,0.2,s,0 +5.5,3.5,1.3,0.2,a,0 +4.9,3.6,1.4,0.1,e,0 +4.4,3,1.3,0.2,s,0 +5.1,3.4,1.5,0.2,a,0 +5,3.5,1.3,0.3,s,0 +4.5,2.3,1.3,0.3,e,0 +4.4,3.2,1.3,0.2,s,0 +5,3.5,1.6,0.6,s,0 +5.1,3.8,1.9,0.4,s,0 +4.8,3,1.4,0.3,s,0 +5.1,3.8,1.6,0.2,a,0 +4.6,3.2,1.4,0.2,a,0 +5.3,3.7,1.5,0.2,a,0 +5,3.3,1.4,0.2,a,0 +7,3.2,4.7,1.4,e,1 +6.4,3.2,4.5,1.5,e,1 +6.9,3.1,4.9,1.5,e,1 +5.5,2.3,4,1.3,e,1 +6.5,2.8,4.6,1.5,s,1 +5.7,2.8,4.5,1.3,e,1 +6.3,3.3,4.7,1.6,s,1 +4.9,2.4,3.3,1,a,1 +6.6,2.9,4.6,1.3,s,1 +5.2,2.7,3.9,1.4,e,1 +5,2,3.5,1,s,1 +5.9,3,4.2,1.5,a,1 +6,2.2,4,1,s,1 +6.1,2.9,4.7,1.4,e,1 +5.6,2.9,3.6,1.3,s,1 +6.7,3.1,4.4,1.4,a,1 +5.6,3,4.5,1.5,a,1 +5.8,2.7,4.1,1,s,1 +6.2,2.2,4.5,1.5,e,1 +5.6,2.5,3.9,1.1,a,1 +5.9,3.2,4.8,1.8,e,1 +6.1,2.8,4,1.3,e,1 +6.3,2.5,4.9,1.5,s,1 +6.1,2.8,4.7,1.2,e,1 +6.4,2.9,4.3,1.3,s,1 +6.6,3,4.4,1.4,a,1 +6.8,2.8,4.8,1.4,s,1 +6.7,3,5,1.7,e,1 +6,2.9,4.5,1.5,s,1 +5.7,2.6,3.5,1,a,1 +5.5,2.4,3.8,1.1,s,1 +5.5,2.4,3.7,1,e,1 +5.8,2.7,3.9,1.2,s,1 +6,2.7,5.1,1.6,e,1 +5.4,3,4.5,1.5,s,1 +6,3.4,4.5,1.6,a,1 +6.7,3.1,4.7,1.5,a,1 +6.3,2.3,4.4,1.3,s,1 +5.6,3,4.1,1.3,e,1 +5.5,2.5,4,1.3,a,1 +5.5,2.6,4.4,1.2,e,1 +6.1,3,4.6,1.4,e,1 +5.8,2.6,4,1.2,s,1 +5,2.3,3.3,1,e,1 +5.6,2.7,4.2,1.3,s,1 +5.7,3,4.2,1.2,a,1 +5.7,2.9,4.2,1.3,s,1 +6.2,2.9,4.3,1.3,e,1 +5.1,2.5,3,1.1,s,1 +5.7,2.8,4.1,1.3,s,1 +6.3,3.3,6,2.5,s,2 +5.8,2.7,5.1,1.9,s,2 +7.1,3,5.9,2.1,a,2 +6.3,2.9,5.6,1.8,a,2 +6.5,3,5.8,2.2,a,2 +7.6,3,6.6,2.1,a,2 +4.9,2.5,4.5,1.7,e,2 +7.3,2.9,6.3,1.8,e,2 +6.7,2.5,5.8,1.8,e,2 +7.2,3.6,6.1,2.5,e,2 +6.5,3.2,5.1,2,s,2 +6.4,2.7,5.3,1.9,e,2 +6.8,3,5.5,2.1,s,2 +5.7,2.5,5,2,s,2 +5.8,2.8,5.1,2.4,e,2 +6.4,3.2,5.3,2.3,s,2 +6.5,3,5.5,1.8,a,2 +7.7,3.8,6.7,2.2,a,2 +7.7,2.6,6.9,2.3,s,2 +6,2.2,5,1.5,e,2 +6.9,3.2,5.7,2.3,s,2 +5.6,2.8,4.9,2,a,2 +7.7,2.8,6.7,2,s,2 +6.3,2.7,4.9,1.8,a,2 +6.7,3.3,5.7,2.1,s,2 +7.2,3.2,6,1.8,a,2 +6.2,2.8,4.8,1.8,s,2 +6.1,3,4.9,1.8,a,2 +6.4,2.8,5.6,2.1,s,2 +7.2,3,5.8,1.6,e,2 +7.4,2.8,6.1,1.9,e,2 +7.9,3.8,6.4,2,e,2 +6.4,2.8,5.6,2.2,e,2 +6.3,2.8,5.1,1.5,e,2 +6.1,2.6,5.6,1.4,s,2 +7.7,3,6.1,2.3,s,2 +6.3,3.4,5.6,2.4,s,2 +6.4,3.1,5.5,1.8,s,2 +6,3,4.8,1.8,s,2 +6.9,3.1,5.4,2.1,a,2 +6.7,3.1,5.6,2.4,a,2 +6.9,3.1,5.1,2.3,a,2 +5.8,2.7,5.1,1.9,a,2 +6.8,3.2,5.9,2.3,a,2 +6.7,3.3,5.7,2.5,s,2 +6.7,3,5.2,2.3,s,2 +6.3,2.5,5,1.9,s,2 +6.5,3,5.2,2,e,2 +6.2,3.4,5.4,2.3,e,2 +5.9,3,5.1,1.8,e,2 diff --git a/logs/cov.out b/logs/cov.out index 5aceeff..f0a3c4b 100644 --- a/logs/cov.out +++ b/logs/cov.out @@ -3,6 +3,7 @@ Name Stmts Miss Cover Missing mllib/__init__.py 7 0 100% mllib/lib/__init__.py 7 0 100% mllib/lib/cluster.py 103 0 100% -mllib/lib/model.py 45 0 100% +mllib/lib/knn.py 70 0 100% +mllib/lib/model.py 44 0 100% ----------------------------------------------------- -TOTAL 162 0 100% +TOTAL 231 0 100% diff --git a/logs/pylint/lib-knn-py.out b/logs/pylint/lib-knn-py.out new file mode 100644 index 0000000..ccf9413 --- /dev/null +++ b/logs/pylint/lib-knn-py.out @@ -0,0 +1,9 @@ +************* Module mllib.lib.knn +knn.py:176:45: I1101: Module 'metrics' has no 'rsq' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member) +knn.py:177:45: I1101: Module 'metrics' has no 'mae' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member) +knn.py:178:46: I1101: Module 'metrics' has no 'mape' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member) +knn.py:179:46: I1101: Module 'metrics' has no 'rmse' member, but source is unavailable. Consider adding this module to extension-pkg-whitelist if you want to perform analysis based on run-time introspection of living objects. (c-extension-no-member) + +-------------------------------------------------------------------- +Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00) + diff --git a/logs/pylint/tests-test_knn-py.out b/logs/pylint/tests-test_knn-py.out new file mode 100644 index 0000000..d7495ee --- /dev/null +++ b/logs/pylint/tests-test_knn-py.out @@ -0,0 +1,4 @@ + +-------------------------------------------------------------------- +Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00) + diff --git a/mllib/__main__.py b/mllib/__main__.py index 2fbd285..7cbcca8 100644 --- a/mllib/__main__.py +++ b/mllib/__main__.py @@ -28,6 +28,7 @@ from lib import cfg, utils # noqa: F841 from lib.cluster import Cluster # noqa: F841 from lib.model import GLMNet # noqa: F841 +from lib.knn import KNN # noqa: F841 # ============================================================================= # --- DO NOT CHANGE ANYTHING FROM HERE @@ -52,13 +53,13 @@ CLI.add_argument("-f", "--filename", nargs=1, type=str, - default=["store.csv"], + default=["iris.csv"], help="input csv filename") args = CLI.parse_args() fn_ip = args.filename[0] -fn_ip = "store.csv" +fn_ip = "iris.csv" # ============================================================================= # --- Main @@ -79,12 +80,21 @@ start_t = time.time_ns() df_ip = pd.read_csv(path + "input/test_glmnet.csv") glm_mod = GLMNet(df=df_ip, - y_var=["y"], - x_var=["x1", "x3"]) + y_var="y", + x_var=["x1", "x2"]) print("\nGLMNet\n") for k, v in glm_mod.model_summary.items(): print(k, str(v).rjust(69 - len(k))) print(elapsed_time("Time", start_t), sep="\n") + # --- KNN + start_t = time.time_ns() + df_ip = pd.read_csv(path + "input/iris.csv") + mod = KNN(df_ip, "y", ["x1", "x2", "x3", "x4"], method="classify") + print("\nKNN\n") + for k, v in mod.model_summary.items(): + print(k, str(v).rjust(69 - len(k))) + print(elapsed_time("Time", start_t), + sep="\n") # --- EOF print(sep, elapsed_time("Total time", start), sep, sep="\n") diff --git a/mllib/lib/knn.py b/mllib/lib/knn.py new file mode 100644 index 0000000..5b14b05 --- /dev/null +++ b/mllib/lib/knn.py @@ -0,0 +1,217 @@ +""" +k-NN module. + +**Available routines:** + +- class ``KNN``: Builds K-Nearest Neighnour model using cross validation. + +Credits +------- +:: + + Authors: + - Diptesh + - Madhu + + Date: Sep 25, 2021 +""" + +# pylint: disable=invalid-name +# pylint: disable=R0902,R0903,R0913,C0413 + +from typing import List, Dict, Any + +import re +import sys +from inspect import getsourcefile +from os.path import abspath + +import pandas as pd +import numpy as np + +from sklearn import neighbors as sn +from sklearn.preprocessing import MinMaxScaler +from sklearn.metrics import classification_report + +from sklearn.model_selection import GridSearchCV + +path = abspath(getsourcefile(lambda: 0)) +path = re.sub(r"(.+\/)(.+.py)", "\\1", path) +sys.path.insert(0, path) + +import metrics # noqa: F841 + + +class KNN(): + """K-Nearest Neighbour (KNN) module. + + Objective: + - Build + `KNN `_ + model and determine optimal k + + Parameters + ---------- + df : pandas.DataFrame + + Pandas dataframe containing the `y_var` and `x_var` + + y_var : str + + Dependant variable + + x_var : List[str] + + Independant variables + + method : str, optional + + Can be either `classify` or `regression` (the default is regression) + + k_fold : int, optional + + Number of cross validations folds (the default is 5) + + param : dict, optional + + KNN parameters (the default is None). + In case of None, the parameters will default to:: + + n_neighbors: max(int(len(df)/(k_fold * 2)), 1) + weights: ["uniform", "distance"] + metric: ["euclidean", "manhattan"] + + Returns + ------- + model : object + + Final optimal model. + + best_params_ : Dict + + Best parameters amongst the given parameters. + + model_summary : Dict + + Model summary containing key metrics like R-squared, RMSE, MSE, MAE, + MAPE for regression and Accuracy, Precision, Recall, F1 score for + classification. + + Methods + ------- + predict + + Example + ------- + >>> mod = KNN(df=df_ip, y_var="y", x_var=["x1", "x2", "x3"]) + >>> df_op = mod.predict(df_predict) + + """ + + def __init__(self, + df: pd.DataFrame, + y_var: str, + x_var: List[str], + method: str = "regression", + k_fold: int = 5, + param: Dict = None): + """Initialize variables for module ``KNN``.""" + self.y_var = y_var + self.x_var = x_var + self.df = df.reset_index(drop=True) + self.method = method + self.model = None + self.k_fold = k_fold + if param is None: + max_k = max(int(len(self.df)/(self.k_fold * 2)), 1) + param = {"n_neighbors": list(range(1, max_k, 2)), + "weights": ["uniform", "distance"], + "metric": ["euclidean", "manhattan"]} + self.param = param + self._pre_process() + self.best_params_ = self._fit() + self.model_summary = None + self._compute_metrics() + + def _pre_process(self): + """Pre-process the data, one hot encoding and normalizing.""" + df_ip_x = pd.get_dummies(self.df[self.x_var]) + self.x_var = list(df_ip_x.columns) + self.norm = MinMaxScaler() + self.norm.fit(df_ip_x) + df_ip_x = pd.DataFrame(self.norm.transform(df_ip_x[self.x_var])) + df_ip_x.columns = self.x_var + self.df = self.df[[self.y_var]].join(df_ip_x) + + def _fit(self) -> Dict[str, Any]: + """Fit KNN model.""" + if self.method == "classify": + gs = GridSearchCV(estimator=sn.KNeighborsClassifier(), + param_grid=self.param, + scoring='f1_weighted', + verbose=0, + refit=True, + return_train_score=True, + cv=self.k_fold, + n_jobs=-1) + elif self.method == "regression": + gs = GridSearchCV(estimator=sn.KNeighborsRegressor(), + param_grid=self.param, + scoring='neg_root_mean_squared_error', + verbose=0, + refit=True, + return_train_score=True, + cv=self.k_fold, + n_jobs=-1) + gs_op = gs.fit(self.df[self.x_var], + self.df[self.y_var]) + self.model = gs_op + return gs_op.best_params_ + + def _compute_metrics(self): + """Compute commonly used metrics to evaluate the model.""" + y = self.df.loc[:, self.y_var].values.tolist() + y_hat = list(self.model.predict(self.df[self.x_var])) + if self.method == "regression": + model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3), + "mae": np.round(metrics.mae(y, y_hat), 3), + "mape": np.round(metrics.mape(y, y_hat), 3), + "rmse": np.round(metrics.rmse(y, y_hat), 3)} + model_summary["mse"] = np.round(model_summary["rmse"] ** 2, 3) + if self.method == "classify": + class_report = classification_report(y, + y_hat, + output_dict=True, + zero_division=0) + model_summary = class_report["weighted avg"] + model_summary["accuracy"] = class_report["accuracy"] + model_summary = {key: round(model_summary[key], 3) + for key in model_summary} + self.model_summary = model_summary + + def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame: + """Predict y_var/target variable. + + Parameters + ---------- + df_predict : pd.DataFrame + + Pandas dataframe containing `x_var`. + + Returns + ------- + pd.DataFrame + + Pandas dataframe containing predicted `y_var` and `x_var`. + + """ + df_op = df_predict.copy(deep=True) + df_predict = pd.get_dummies(df_predict) + df_predict_tmp = pd.DataFrame(columns=self.x_var) + df_predict = pd.concat([df_predict_tmp, df_predict]) + df_predict = df_predict.fillna(0) + df_predict = pd.DataFrame(self.norm.transform(df_predict[self.x_var])) + df_predict.columns = self.x_var + y_hat = self.model.predict(df_predict) + df_op.insert(loc=0, column=self.y_var, value=y_hat) + return df_op diff --git a/mllib/lib/model.py b/mllib/lib/model.py index a90ebcf..efa6afc 100644 --- a/mllib/lib/model.py +++ b/mllib/lib/model.py @@ -1,5 +1,5 @@ """ -Module for commonly used machine learning modelling algorithms. +GLMNet module. **Available routines:** @@ -56,7 +56,7 @@ class GLMNet(): Pandas dataframe containing `y_var` and `x_var` variables. - y_var : List[str] + y_var : str Dependant variable. @@ -108,12 +108,12 @@ class GLMNet(): def __init__(self, df: pd.DataFrame, - y_var: List[str], + y_var: str, x_var: List[str], strata: str = None, param: Dict = None): """Initialize variables for module ``GLMNet``.""" - self.df = df[y_var + x_var] + self.df = df[[y_var] + x_var] self.y_var = y_var self.x_var = x_var self.strata = strata @@ -137,7 +137,7 @@ def _fit(self) -> None: """Fit the best GLMNet model.""" train_x, test_x,\ train_y, test_y = split(self.df[self.x_var], - self.df[self.y_var], + self.df[[self.y_var]], test_size=self.param["test_perc"], random_state=self.param["seed"], stratify=self.strata) @@ -161,7 +161,7 @@ def _fit(self) -> None: def _compute_metrics(self): """Compute commonly used metrics to evaluate the model.""" - y = self.df[self.y_var].iloc[:, 0].values.tolist() + y = self.df[[self.y_var]].iloc[:, 0].values.tolist() y_hat = list(self.predict(self.df[self.x_var])["y"].values) model_summary = {"rsq": np.round(metrics.rsq(y, y_hat), 3), "mae": np.round(metrics.mae(y, y_hat), 3), @@ -187,6 +187,5 @@ def predict(self, df_predict: pd.DataFrame) -> pd.DataFrame: """ y_hat = self.model.predict(df_predict) - df_predict = df_predict.copy() - df_predict["y"] = y_hat + df_predict.insert(loc=0, column=self.y_var, value=y_hat) return df_predict diff --git a/requirements.txt b/requirements.txt index b593d70..ef333fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ numpy==1.19.5 -Cython==0.29.15 pandas==1.1.3 +Cython==0.29.15 scikit_learn==1.0 diff --git a/tests/test_cluster.py b/tests/test_cluster.py index efd74f4..f14e30b 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -44,7 +44,7 @@ def ignore_warnings(test_func): - """Suppress deprecation warnings of pulp.""" + """Suppress warnings.""" def do_test(self, *args, **kwargs): with warnings.catch_warnings(): diff --git a/tests/test_knn.py b/tests/test_knn.py new file mode 100644 index 0000000..b244270 --- /dev/null +++ b/tests/test_knn.py @@ -0,0 +1,118 @@ +""" +Test suite module for ``knn``. + +Credits +------- +:: + + Authors: + - Diptesh + - Madhu + + Date: Sep 25, 2021 +""" + +# pylint: disable=invalid-name +# pylint: disable=wrong-import-position + +import unittest +import warnings +import re +import sys + +from inspect import getsourcefile +from os.path import abspath + +import pandas as pd + +from sklearn.model_selection import train_test_split as split +from sklearn import metrics as sk_metrics + +# Set base path +path = abspath(getsourcefile(lambda: 0)) +path = re.sub(r"(.+)(\/tests.*)", "\\1", path) + +sys.path.insert(0, path) + +from mllib.lib.knn import KNN # noqa: F841 + +# ============================================================================= +# --- DO NOT CHANGE ANYTHING FROM HERE +# ============================================================================= + +path = path + "/data/input/" + +# ============================================================================= +# --- User defined functions +# ============================================================================= + + +def ignore_warnings(test_func): + """Suppress warnings.""" + + def do_test(self, *args, **kwargs): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + test_func(self, *args, **kwargs) + return do_test + + +class Test_Knn(unittest.TestCase): + """Test suite for module ``KNN``.""" + + def setUp(self): + """Set up for module ``KNN``.""" + + def test_knn_class(self): + """KNN: Test for classification.""" + x_var = ["x1", "x2"] + y_var = "y" + df_ip = pd.read_csv(path + "iris.csv") + df_ip = df_ip[[y_var] + x_var] + df_train, df_test = split(df_ip, + stratify=df_ip[y_var], + test_size=0.1, + random_state=42) + mod = KNN(df_train, y_var, x_var, method="classify") + y_hat = mod.predict(df_test[x_var])[y_var].tolist() + y = df_test[y_var].values.tolist() + acc = round(sk_metrics.accuracy_score(y, y_hat), 2) + self.assertGreaterEqual(acc, 0.93) + + @ignore_warnings + def test_knn_reg(self): + """KNN: Test for regression.""" + x_var = ["x1", "x2"] + y_var = "y" + df_ip = pd.read_csv(path + "iris.csv") + df_ip = df_ip[[y_var] + x_var] + df_train, df_test = split(df_ip, + stratify=df_ip[y_var], + test_size=0.1, + random_state=42) + mod = KNN(df_train, y_var, x_var, method="regression") + y_hat = mod.predict(df_test[x_var])[y_var].tolist() + y = df_test[y_var].values.tolist() + acc = round(sk_metrics.mean_squared_error(y, y_hat), 2) + self.assertLessEqual(acc, 0.1) + + def test_knn_cat(self): + """KNN: Test for one-hot encoding in prediction.""" + x_var = ["x1", "x2"] + y_var = "y" + df_ip = pd.read_csv(path + "iris.csv") + df_ip = df_ip[[y_var] + x_var] + df_train = df_ip.iloc[1:140] + df_predict = df_ip.iloc[145:150, 1:] + mod = KNN(df_train, y_var, x_var, method="classify") + df_predict_columns = mod.predict(df_predict).columns.tolist() + df_predict_columns.pop(0) + self.assertGreaterEqual(mod.x_var, df_predict_columns) + + +# ============================================================================= +# --- Main +# ============================================================================= + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_metrics.py b/tests/test_metrics.py index d9b7eac..948bec4 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -38,7 +38,7 @@ def ignore_warnings(test_func): - """Suppress deprecation warnings of pulp.""" + """Suppress warnings.""" def do_test(self, *args, **kwargs): with warnings.catch_warnings(): diff --git a/tests/test_model.py b/tests/test_model.py index a73901c..a4c7ac1 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -45,7 +45,7 @@ def ignore_warnings(test_func): - """Suppress deprecation warnings.""" + """Suppress warnings.""" def do_test(self, *args, **kwargs): with warnings.catch_warnings(): @@ -64,7 +64,7 @@ def test_known_equation(self): """GLMNet: Test a known equation.""" df_ip = pd.read_csv(path + "test_glmnet.csv") mod = GLMNet(df=df_ip, - y_var=["y"], + y_var="y", x_var=["x1", "x2", "x3"]) op = mod.opt self.assertEqual(np.round(op.get('intercept'), 0), 100.0) @@ -76,7 +76,7 @@ def test_predict_target_variable(self): """GLMNet: Test to predict a target variable.""" df_ip = pd.read_csv(path + "test_glmnet.csv") mod = GLMNet(df=df_ip, - y_var=["y"], + y_var="y", x_var=["x1", "x2", "x3"]) df_predict = pd.DataFrame({"x1": [10, 20], "x2": [5, 10],