andersbogsnes · andersbogsnes · Jul 21, 2020 · Jul 21, 2020 · Jul 21, 2020 · Jul 21, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@
  combined feature_pipeline + estimator Pipeline
 - Can pass a feature pipeline to `Dataset.plot` methods, to apply preprocessing
 before visualization
+- New config implementation. If you need to reset the configuration, you should use `Model.config.reset_config()`
 
 # v0.10.3
 - Fixed typehints in Dataset

diff --git a/docs/config.inc.rst b/docs/config.inc.rst
@@ -4,56 +4,41 @@ Config
 ------
 All configuration options available
 
-:class: ml_tooling.config.DefaultConfig
+.. autoclass:: ml_tooling.config.DefaultConfig
 
 
     :attr:`VERBOSITY` = 0
         The level of verbosity from output
 
-
     :attr:`CLASSIFIER_METRIC` = 'accuracy'
-
-    Default metric for classifiers
+        Default metric for classifiers
 
     :attr:`REGRESSION_METRIC` = 'r2'
-
-    Default metric for regressions
+        Default metric for regressions
 
     :attr:`CROSS_VALIDATION` = 10
-
-    Default Number of cross validation folds to use
-
-    :attr:`STYLE_SHEET` = 'almbrand.mplstyle'
-
-    Default style sheet to use for plots
+        Default Number of cross validation folds to use
 
     :attr:`N_JOBS` = -1
-
-    Default number of cores to use when doing multiprocessing.
-    -1 means use all available
+        Default number of cores to use when doing multiprocessing.
+        -1 means use all available
 
     :attr:`RANDOM_STATE` = 42
-
-    Default random state seed for all functions involving randomness
+        Default random state seed for all functions involving randomness
 
     :attr:`RUN_DIR` = './runs'
+        Default folder to store run logging files
 
-    Default folder to store run logging files
-
-    :attr:`MODEL_DIR` = './models'
-
-    Default folder to store pickled models in
+    :attr:`ESTIMATOR_DIR` = './models'
+        Default folder to store pickled models in
 
     :attr:`LOG` = False
+        Toggles whether or not to log runs to a file. Set to True if you
+        want every run to be logged, else use the :meth:`~ml_tooling.baseclass.ModelData.log`
+        context manager
 
-    Toggles whether or not to log runs to a file. Set to True if you
-    want every run to be logged, else use the :meth:`~ml_tooling.baseclass.ModelData.log`
-    context manager
-
-    :attr:`SHUFFLE` = True
-
-    Default whether or not to shuffle data for test set
+    :attr:`TRAIN_TEST_SHUFFLE` = True
+        Default whether or not to shuffle data for test set
 
     :attr:`TEST_SIZE` = 0.25
-
-    Default percentage of data that will be part of the test set
+        Default percentage of data that will be part of the test set
diff --git a/src/ml_tooling/baseclass.py b/src/ml_tooling/baseclass.py
@@ -1,16 +1,17 @@
 import datetime
 import pathlib
-import joblib
-import pandas as pd
 from contextlib import contextmanager
 from importlib.resources import path as import_path
 from typing import Tuple, Optional, Sequence, Union, List, Iterable, Any
+
+import joblib
+import pandas as pd
 from sklearn.base import is_classifier, is_regressor
 from sklearn.exceptions import NotFittedError
 from sklearn.model_selection import check_cv
 from sklearn.pipeline import Pipeline
 
-from ml_tooling.config import DefaultConfig, ConfigGetter
+from ml_tooling.config import config
 from ml_tooling.data.base_data import Dataset
 from ml_tooling.logging.logger import create_logger
 from ml_tooling.metrics.metric import Metrics
@@ -40,9 +41,6 @@ class Model:
     Wrapper class for Estimators
     """
 
-    _config = None
-    config = ConfigGetter()
-
     def __init__(self, estimator: Estimator, feature_pipeline: Pipeline = None):
         """
         Parameters
@@ -57,6 +55,7 @@ def __init__(self, estimator: Estimator, feature_pipeline: Pipeline = None):
         self._estimator: Estimator = _validate_estimator(estimator)
         self.feature_pipeline = feature_pipeline
         self.result: Optional[ResultType] = None
+        self.config = config
 
     @property
     def estimator(self):
@@ -437,7 +436,7 @@ def score_estimator(
         if not data.has_validation_set:
             data.create_train_test(
                 stratify=self.is_classifier,
-                shuffle=self.config.SHUFFLE,
+                shuffle=self.config.TRAIN_TEST_SHUFFLE,
                 test_size=self.config.TEST_SIZE,
                 seed=self.config.RANDOM_STATE,
             )
@@ -665,15 +664,6 @@ def load_production_estimator(cls, module_name: str):
             estimator = joblib.load(path)
         return cls(estimator)
 
-    @classmethod
-    def reset_config(cls):
-        """
-        Reset configuration to default
-        """
-        cls._config = DefaultConfig()
-
-        return cls
-
     def __repr__(self):
         return f"<Model: {self.estimator_name}>"
 

diff --git a/src/ml_tooling/config.py b/src/ml_tooling/config.py
@@ -12,27 +12,43 @@
 
 class DefaultConfig:
     """
-    Configuration for a given BaseClass. Configs propagate through each instance
+    Configuration for Models
     """
 
+    default_config = {
+        "VERBOSITY": 0,
+        "CLASSIFIER_METRIC": "accuracy",
+        "REGRESSION_METRIC": "r2",
+        "CROSS_VALIDATION": 10,
+        "N_JOBS": -1,
+        "RANDOM_STATE": 42,
+        "TRAIN_TEST_SHUFFLE": True,
+        "TEST_SIZE": 0.25,
+    }
+
     def __init__(self):
-        self.VERBOSITY = 0
-        self.CLASSIFIER_METRIC = "accuracy"
-        self.REGRESSION_METRIC = "r2"
-        self.CROSS_VALIDATION = 10
-        self.STYLE_SHEET = MPL_STYLESHEET
-        self.N_JOBS = -1
-        self.RANDOM_STATE = 42
+        self._set_config()
+        self.LOG = False
         self.RUN_DIR = RUN_DIR
         self.ESTIMATOR_DIR = ESTIMATOR_DIR
-        self.LOG = False
-        self.SHUFFLE = True
-        self.TEST_SIZE = 0.25
+
+    def _set_config(self):
+        self.VERBOSITY = self.default_config["VERBOSITY"]
+        self.CLASSIFIER_METRIC = self.default_config["CLASSIFIER_METRIC"]
+        self.REGRESSION_METRIC = self.default_config["REGRESSION_METRIC"]
+        self.CROSS_VALIDATION = self.default_config["CROSS_VALIDATION"]
+        self.N_JOBS = self.default_config["N_JOBS"]
+        self.RANDOM_STATE = self.default_config["RANDOM_STATE"]
+        self.TRAIN_TEST_SHUFFLE = self.default_config["TRAIN_TEST_SHUFFLE"]
+        self.TEST_SIZE = self.default_config["TEST_SIZE"]
 
     @property
     def default_storage(self):
         return FileStorage(self.ESTIMATOR_DIR)
 
+    def reset_config(self):
+        self._set_config()
+
     def __repr__(self):
         attrs = "\n".join(
             [
@@ -44,13 +60,4 @@ def __repr__(self):
         return f"<Config: \n{attrs}\n>"
 
 
-class ConfigGetter:
-    """
-    Give each class that inherits from Model an individual config attribute
-    without relying on the user to overriding the config when they define their class.
-    """
-
-    def __get__(self, obj, cls):
-        if cls._config is None:
-            cls._config = DefaultConfig()
-        return cls._config
+config = DefaultConfig()
diff --git a/src/ml_tooling/plots/viz/baseviz.py b/src/ml_tooling/plots/viz/baseviz.py
@@ -11,17 +11,17 @@
 )
 from ml_tooling.utils import _get_estimator_name
 from sklearn.base import is_classifier
+from ml_tooling.config import MPL_STYLESHEET, config
 
 
 class BaseVisualize:
     """
     Base class for visualizers
     """
 
-    def __init__(self, estimator, data, config):
+    def __init__(self, estimator, data):
         self._estimator = estimator
         self._estimator_name = _get_estimator_name(estimator)
-        self._config = config
         self._data = data
 
     @property
@@ -39,9 +39,9 @@ def default_metric(self):
         """
 
         return (
-            self._config.CLASSIFIER_METRIC
+            config.CLASSIFIER_METRIC
             if is_classifier(self._estimator)
-            else self._config.REGRESSION_METRIC
+            else config.REGRESSION_METRIC
         )
 
     def feature_importance(
@@ -52,7 +52,6 @@ def feature_importance(
         bottom_n: Union[int, float] = None,
         add_label: bool = True,
         n_jobs: int = None,
-        random_state: int = None,
         ax: Axes = None,
         **kwargs,
     ) -> Axes:
@@ -83,9 +82,6 @@ def feature_importance(
             Overwrites N_JOBS from settings. Useful if data is to big to fit
             in memory multiple times.
 
-        random_state: int
-            Random state to be used when permuting features,
-
         ax: Axes
             Draws graph on passed ax - otherwise creates new ax
 
@@ -97,19 +93,19 @@ def feature_importance(
             matplotlib.Axes
         """
 
-        n_jobs = self._config.N_JOBS if n_jobs is None else n_jobs
+        n_jobs = config.N_JOBS if n_jobs is None else n_jobs
         scoring = self.default_metric if scoring == "default" else scoring
         title = f"Feature Importances ({scoring.title()}) - {self._estimator_name}"
 
-        with plt.style.context(self._config.STYLE_SHEET):
+        with plt.style.context(MPL_STYLESHEET):
             return plot_feature_importance(
                 estimator=self._estimator,
                 x=self._data.x,
                 y=self._data.y,
                 scoring=scoring,
                 n_repeats=n_repeats,
                 n_jobs=n_jobs,
-                random_state=random_state,
+                random_state=config.RANDOM_STATE,
                 ax=ax,
                 top_n=top_n,
                 bottom_n=bottom_n,
@@ -120,7 +116,7 @@ def feature_importance(
 
     def learning_curve(
         self,
-        cv: int = 5,
+        cv: int = None,
         scoring: str = "default",
         n_jobs: int = None,
         train_sizes: Sequence[float] = np.linspace(0.1, 1.0, 5),
@@ -157,9 +153,10 @@ def learning_curve(
         """
 
         title = f"Learning Curve - {self._estimator_name}"
-        n_jobs = self._config.N_JOBS if n_jobs is None else n_jobs
+        n_jobs = config.N_JOBS if n_jobs is None else n_jobs
+        cv = config.CROSS_VALIDATION if cv is None else cv
 
-        with plt.style.context(self._config.STYLE_SHEET):
+        with plt.style.context(MPL_STYLESHEET):
             ax = plot_learning_curve(
                 estimator=self._estimator,
                 x=self._data.train_x,
@@ -180,7 +177,7 @@ def validation_curve(
         param_name: str,
         param_range: Sequence,
         n_jobs: int = None,
-        cv: int = 5,
+        cv: int = None,
         scoring: str = "default",
         ax: Axes = None,
         **kwargs,
@@ -205,7 +202,8 @@ def validation_curve(
             Number of jobs to use in parallelizing the estimator fitting and scoring
 
         cv: int
-            Number of CV iterations to run. Uses a :class:`~sklearn.model_selection.StratifiedKFold`
+            Number of CV iterations to run. Defaults to value in `Model.config`.
+            Uses a :class:`~sklearn.model_selection.StratifiedKFold`
             if`estimator` is a classifier - otherwise a :class:`~sklearn.model_selection.KFold`
             is used.
 
@@ -224,10 +222,11 @@ def validation_curve(
         plt.Axes
 
         """
-        n_jobs = self._config.N_JOBS if n_jobs is None else n_jobs
+        n_jobs = config.N_JOBS if n_jobs is None else n_jobs
+        cv = config.CROSS_VALIDATION if cv is None else cv
         title = f"Validation Curve - {self._estimator_name}"
 
-        with plt.style.context(self._config.STYLE_SHEET):
+        with plt.style.context(MPL_STYLESHEET):
             ax = plot_validation_curve(
                 self._estimator,
                 x=self._data.train_x,

diff --git a/src/ml_tooling/plots/viz/classification_viz.py b/src/ml_tooling/plots/viz/classification_viz.py
@@ -8,6 +8,7 @@
 )
 from ml_tooling.utils import VizError, _classify
 from ml_tooling.plots.viz.baseviz import BaseVisualize
+from ml_tooling.config import MPL_STYLESHEET
 
 
 class ClassificationVisualize(BaseVisualize):
@@ -35,7 +36,7 @@ def confusion_matrix(
         matplotlib.Axes
         """
 
-        with plt.style.context(self._config.STYLE_SHEET):
+        with plt.style.context(MPL_STYLESHEET):
             title = f"Confusion Matrix - {self._estimator_name}"
             y_pred = _classify(self._data.test_x, self._estimator, threshold=threshold)
             return plot_confusion_matrix(
@@ -55,7 +56,7 @@ def roc_curve(self, **kwargs) -> plt.Axes:
         if not hasattr(self._estimator, "predict_proba"):
             raise VizError("Model must provide a 'predict_proba' method")
 
-        with plt.style.context(self._config.STYLE_SHEET):
+        with plt.style.context(MPL_STYLESHEET):
             title = f"ROC AUC - {self._estimator_name}"
             y_proba = self._estimator.predict_proba(self._data.test_x)[:, 1]
             return plot_roc_auc(self._data.test_y, y_proba, title=title, **kwargs)
@@ -70,7 +71,7 @@ def lift_curve(self, **kwargs) -> plt.Axes:
         -------
         matplotlib.Axes
         """
-        with plt.style.context(self._config.STYLE_SHEET):
+        with plt.style.context(MPL_STYLESHEET):
             title = f"Lift Curve - {self._estimator_name}"
             y_proba = self._estimator.predict_proba(self._data.test_x)[:, 1]
             return plot_lift_curve(self._data.test_y, y_proba, title=title, **kwargs)
@@ -95,7 +96,7 @@ def pr_curve(self, **kwargs) -> plt.Axes:
         if not hasattr(self._estimator, "predict_proba"):
             raise VizError("Estimator must provide a 'predict_proba' method")
 
-        with plt.style.context(self._config.STYLE_SHEET):
+        with plt.style.context(MPL_STYLESHEET):
             title = f"Precision-Recall - {self._estimator_name}"
             y_proba = self._estimator.predict_proba(self._data.test_x)[:, 1]
             return plot_pr_curve(self._data.test_y, y_proba, title=title, **kwargs)