From 71cc926516f30791cba77522518e8e36193fa505 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Sat, 3 Dec 2022 17:33:08 +0100
Subject: [PATCH 1/8] added all_estimators in ObjectNode trusted

---
 skops/io/_general.py         |  4 ++--
 skops/io/_trusted_types.py   |  8 ++++++++
 skops/io/tests/test_audit.py | 22 +++++++++++++---------
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/skops/io/_general.py b/skops/io/_general.py
index 7f27fbcb..126bcfaf 100644
--- a/skops/io/_general.py
+++ b/skops/io/_general.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from ._audit import Node, get_tree
-from ._trusted_types import PRIMITIVE_TYPE_NAMES
+from ._trusted_types import PRIMITIVE_TYPE_NAMES, SKLEARN_ESTIMATOR_TYPE_NAMES
 from ._utils import (
     LoadContext,
     SaveContext,
@@ -383,7 +383,7 @@ def __init__(
 
         self.children = {"attrs": attrs}
         # TODO: what do we trust?
-        self.trusted = self._get_trusted(trusted, [])
+        self.trusted = self._get_trusted(trusted, default=SKLEARN_ESTIMATOR_TYPE_NAMES)
 
     def _construct(self):
         cls = gettype(self.module_name, self.class_name)
diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py
index e3c38ffd..abc780bd 100644
--- a/skops/io/_trusted_types.py
+++ b/skops/io/_trusted_types.py
@@ -1,3 +1,11 @@
+from sklearn.utils import all_estimators
+
+from ._utils import get_type_name
+
 PRIMITIVES_TYPES = [int, float, str, bool]
 
 PRIMITIVE_TYPE_NAMES = ["builtins." + t.__name__ for t in PRIMITIVES_TYPES]
+
+SKLEARN_ESTIMATOR_TYPE_NAMES = [
+    get_type_name(estimator_class) for _, estimator_class in all_estimators()
+]
diff --git a/skops/io/tests/test_audit.py b/skops/io/tests/test_audit.py
index a1ae0188..4b35ca85 100644
--- a/skops/io/tests/test_audit.py
+++ b/skops/io/tests/test_audit.py
@@ -146,6 +146,18 @@ def __init__(self):
     assert not hasattr(temp, "b")
 
 
+def test_sklearn_trusted_set():
+    clf = Pipeline(
+        [
+            ("scaler", StandardScaler()),
+            ("clf", LogisticRegression(random_state=0, solver="liblinear")),
+        ]
+    )
+
+    untrusted = get_untrusted_types(data=dumps(clf))
+    assert len(untrusted) == 0
+
+
 def test_complex_pipeline_untrusted_set():
     # fmt: off
     clf = Pipeline([
@@ -162,12 +174,4 @@ def test_complex_pipeline_untrusted_set():
 
     untrusted = get_untrusted_types(data=dumps(clf))
     type_names = [x.split(".")[-1] for x in untrusted]
-    assert type_names == [
-        "sqrt",
-        "square",
-        "LogisticRegression",
-        "FeatureUnion",
-        "Pipeline",
-        "StandardScaler",
-        "FunctionTransformer",
-    ]
+    assert type_names == ["sqrt", "square"]

From 0e600b81c78e3ce9ec547695c28789c29536eb4a Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Sat, 3 Dec 2022 19:06:11 +0100
Subject: [PATCH 2/8] added simple sanity check

---
 skops/io/_trusted_types.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py
index abc780bd..1ef1b826 100644
--- a/skops/io/_trusted_types.py
+++ b/skops/io/_trusted_types.py
@@ -7,5 +7,7 @@
 PRIMITIVE_TYPE_NAMES = ["builtins." + t.__name__ for t in PRIMITIVES_TYPES]
 
 SKLEARN_ESTIMATOR_TYPE_NAMES = [
-    get_type_name(estimator_class) for _, estimator_class in all_estimators()
+    get_type_name(estimator_class)
+    for _, estimator_class in all_estimators()
+    if get_type_name(estimator_class).startswith("sklearn.")
 ]

From d12564f12fb0484e4e52ee805e9aa9cf82625a10 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Sun, 4 Dec 2022 10:55:13 +0100
Subject: [PATCH 3/8] testing all estimators in test_sklearn_trusted_types

---
 skops/io/tests/test_audit.py    |  19 +++--
 skops/io/tests/test_persist.py  | 134 ++----------------------------
 skops/io/tests/testing_utils.py | 139 ++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 138 deletions(-)
 create mode 100644 skops/io/tests/testing_utils.py

diff --git a/skops/io/tests/test_audit.py b/skops/io/tests/test_audit.py
index 4b35ca85..be29e89e 100644
--- a/skops/io/tests/test_audit.py
+++ b/skops/io/tests/test_audit.py
@@ -9,11 +9,13 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import FeatureUnion, Pipeline
 from sklearn.preprocessing import FunctionTransformer, StandardScaler
+from sklearn.utils.estimator_checks import _get_check_estimator_ids
 
 from skops.io import dumps, get_untrusted_types
 from skops.io._audit import Node, audit_tree, check_type, get_tree, temp_setattr
 from skops.io._general import DictNode, dict_get_state
 from skops.io._utils import LoadContext, SaveContext, gettype
+from skops.io.tests.testing_utils import get_tested_estimators
 
 
 class CustomType:
@@ -146,16 +148,13 @@ def __init__(self):
     assert not hasattr(temp, "b")
 
 
-def test_sklearn_trusted_set():
-    clf = Pipeline(
-        [
-            ("scaler", StandardScaler()),
-            ("clf", LogisticRegression(random_state=0, solver="liblinear")),
-        ]
-    )
-
-    untrusted = get_untrusted_types(data=dumps(clf))
-    assert len(untrusted) == 0
+@pytest.mark.parametrize(
+    "estimator", get_tested_estimators(), ids=_get_check_estimator_ids
+)
+def test_sklearn_trusted_types(estimator):
+    untrusted_types = get_untrusted_types(data=dumps(estimator))
+    sklearn_untrusted_types = [t for t in untrusted_types if t.startswith("skelarn.")]
+    assert len(sklearn_untrusted_types) == 0
 
 
 def test_complex_pipeline_untrusted_set():
diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py
index f833620a..fbff2844 100644
--- a/skops/io/tests/test_persist.py
+++ b/skops/io/tests/test_persist.py
@@ -5,37 +5,24 @@
 import sys
 import warnings
 from collections import Counter
-from functools import partial, wraps
+from functools import wraps
 from pathlib import Path
 from zipfile import ZipFile
 
 import joblib
 import numpy as np
 import pytest
-from scipy import sparse, special
+from scipy import sparse
 from sklearn.base import BaseEstimator, is_regressor
-from sklearn.compose import ColumnTransformer
 from sklearn.datasets import load_sample_images, make_classification, make_regression
-from sklearn.decomposition import SparseCoder
 from sklearn.exceptions import SkipTestWarning
 from sklearn.experimental import enable_halving_search_cv  # noqa
 from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import (
-    GridSearchCV,
-    GroupKFold,
-    HalvingGridSearchCV,
-    HalvingRandomSearchCV,
-    KFold,
-    RandomizedSearchCV,
-    ShuffleSplit,
-    check_cv,
-)
-from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
+from sklearn.model_selection import GroupKFold, KFold, ShuffleSplit, check_cv
 from sklearn.pipeline import FeatureUnion, Pipeline
 from sklearn.preprocessing import (
     FunctionTransformer,
     MinMaxScaler,
-    Normalizer,
     PolynomialFeatures,
     StandardScaler,
 )
@@ -58,10 +45,7 @@
 from skops.io._sklearn import UNSUPPORTED_TYPES
 from skops.io._utils import LoadContext, SaveContext, _get_state, get_state
 from skops.io.exceptions import UnsupportedTypeException
-
-# Default settings for X
-N_SAMPLES = 50
-N_FEATURES = 20
+from skops.io.tests.testing_utils import N_FEATURES, N_SAMPLES, get_tested_estimators
 
 # TODO: Investigate why that seems to be an issue on MacOS (only observed with
 # Python 3.8)
@@ -122,112 +106,6 @@ def wrapper(state, load_context, trusted):
             NODE_TYPE_MAPPING[key] = debug_get_tree(method)
 
 
-def _tested_estimators(type_filter=None):
-    for name, Estimator in all_estimators(type_filter=type_filter):
-        if Estimator in UNSUPPORTED_TYPES:
-            continue
-        try:
-            # suppress warnings here for skipped estimators.
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore",
-                    category=SkipTestWarning,
-                    message="Can't instantiate estimator",
-                )
-                estimator = _construct_instance(Estimator)
-                # with the kind of data we pass, it needs to be 1 for the few
-                # estimators which have this.
-                if "n_components" in estimator.get_params():
-                    estimator.set_params(n_components=1)
-                    # Then n_best needs to be <= n_components
-                    if "n_best" in estimator.get_params():
-                        estimator.set_params(n_best=1)
-                if "patch_size" in estimator.get_params():
-                    # set patch size to fix PatchExtractor test.
-                    estimator.set_params(patch_size=(3, 3))
-        except SkipTest:
-            continue
-
-        yield estimator
-
-    # nested Pipeline & FeatureUnion
-    # fmt: off
-    yield Pipeline([
-        ("features", FeatureUnion([
-            ("scaler", StandardScaler()),
-            ("scaled-poly", Pipeline([
-                ("polys", FeatureUnion([
-                    ("poly1", PolynomialFeatures()),
-                    ("poly2", PolynomialFeatures(degree=3, include_bias=False))
-                ])),
-                ("scale", MinMaxScaler()),
-            ])),
-        ])),
-        ("clf", LogisticRegression(random_state=0, solver="liblinear")),
-    ])
-    # fmt: on
-
-    # FunctionTransformer with numpy functions
-    yield FunctionTransformer(
-        func=np.sqrt,
-        inverse_func=np.square,
-    )
-
-    # FunctionTransformer with scipy functions - problem is that they look like
-    # numpy ufuncs
-    yield FunctionTransformer(
-        func=special.erf,
-        inverse_func=special.erfinv,
-    )
-
-    # partial functions should be supported
-    yield FunctionTransformer(
-        func=partial(np.add, 10),
-        inverse_func=partial(np.add, -10),
-    )
-
-    yield KNeighborsClassifier(algorithm="kd_tree")
-    yield KNeighborsRegressor(algorithm="ball_tree")
-
-    yield ColumnTransformer(
-        [
-            ("norm1", Normalizer(norm="l1"), [0]),
-            ("norm2", Normalizer(norm="l1"), [1, 2]),
-            ("norm3", Normalizer(norm="l1"), [True] + (N_FEATURES - 1) * [False]),
-            ("norm4", Normalizer(norm="l1"), np.array([1, 2])),
-            ("norm5", Normalizer(norm="l1"), slice(3)),
-            ("norm6", Normalizer(norm="l1"), slice(-10, -3, 2)),
-        ],
-    )
-
-    yield GridSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-    )
-
-    yield HalvingGridSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-    )
-
-    yield HalvingRandomSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-    )
-
-    yield RandomizedSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-        n_iter=3,
-    )
-
-    dictionary = np.random.randint(-2, 3, size=(5, N_FEATURES)).astype(float)
-    yield SparseCoder(
-        dictionary=dictionary,
-        transform_algorithm="lasso_lars",
-    )
-
-
 def _unsupported_estimators(type_filter=None):
     for name, Estimator in all_estimators(type_filter=type_filter):
         if Estimator not in UNSUPPORTED_TYPES:
@@ -398,7 +276,7 @@ def assert_params_equal(params1, params2):
 
 
 @pytest.mark.parametrize(
-    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
+    "estimator", get_tested_estimators(), ids=_get_check_estimator_ids
 )
 def test_can_persist_non_fitted(estimator):
     """Check that non-fitted estimators can be persisted."""
@@ -466,7 +344,7 @@ def get_input(estimator):
 
 
 @pytest.mark.parametrize(
-    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
+    "estimator", get_tested_estimators(), ids=_get_check_estimator_ids
 )
 def test_can_persist_fitted(estimator, request):
     """Check that fitted estimators can be persisted and return the right results."""
diff --git a/skops/io/tests/testing_utils.py b/skops/io/tests/testing_utils.py
new file mode 100644
index 00000000..48e66dd0
--- /dev/null
+++ b/skops/io/tests/testing_utils.py
@@ -0,0 +1,139 @@
+import warnings
+from functools import partial
+
+import numpy as np
+from scipy import special
+from sklearn.compose import ColumnTransformer
+from sklearn.decomposition import SparseCoder
+from sklearn.exceptions import SkipTestWarning
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import (
+    GridSearchCV,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    RandomizedSearchCV,
+)
+from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
+from sklearn.pipeline import FeatureUnion, Pipeline
+from sklearn.preprocessing import (
+    FunctionTransformer,
+    MinMaxScaler,
+    Normalizer,
+    PolynomialFeatures,
+    StandardScaler,
+)
+from sklearn.utils import all_estimators
+from sklearn.utils._testing import SkipTest
+from sklearn.utils.estimator_checks import _construct_instance
+
+from skops.io._sklearn import UNSUPPORTED_TYPES
+
+# Default settings for X
+N_SAMPLES = 50
+N_FEATURES = 20
+
+
+def get_tested_estimators(type_filter=None):
+    for name, Estimator in all_estimators(type_filter=type_filter):
+        if Estimator in UNSUPPORTED_TYPES:
+            continue
+        try:
+            # suppress warnings here for skipped estimators.
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore",
+                    category=SkipTestWarning,
+                    message="Can't instantiate estimator",
+                )
+                estimator = _construct_instance(Estimator)
+                # with the kind of data we pass, it needs to be 1 for the few
+                # estimators which have this.
+                if "n_components" in estimator.get_params():
+                    estimator.set_params(n_components=1)
+                    # Then n_best needs to be <= n_components
+                    if "n_best" in estimator.get_params():
+                        estimator.set_params(n_best=1)
+                if "patch_size" in estimator.get_params():
+                    # set patch size to fix PatchExtractor test.
+                    estimator.set_params(patch_size=(3, 3))
+        except SkipTest:
+            continue
+
+        yield estimator
+
+    # nested Pipeline & FeatureUnion
+    # fmt: off
+    yield Pipeline([
+        ("features", FeatureUnion([
+            ("scaler", StandardScaler()),
+            ("scaled-poly", Pipeline([
+                ("polys", FeatureUnion([
+                    ("poly1", PolynomialFeatures()),
+                    ("poly2", PolynomialFeatures(degree=3, include_bias=False))
+                ])),
+                ("scale", MinMaxScaler()),
+            ])),
+        ])),
+        ("clf", LogisticRegression(random_state=0, solver="liblinear")),
+    ])
+    # fmt: on
+
+    # FunctionTransformer with numpy functions
+    yield FunctionTransformer(
+        func=np.sqrt,
+        inverse_func=np.square,
+    )
+
+    # FunctionTransformer with scipy functions - problem is that they look like
+    # numpy ufuncs
+    yield FunctionTransformer(
+        func=special.erf,
+        inverse_func=special.erfinv,
+    )
+
+    # partial functions should be supported
+    yield FunctionTransformer(
+        func=partial(np.add, 10),
+        inverse_func=partial(np.add, -10),
+    )
+
+    yield KNeighborsClassifier(algorithm="kd_tree")
+    yield KNeighborsRegressor(algorithm="ball_tree")
+
+    yield ColumnTransformer(
+        [
+            ("norm1", Normalizer(norm="l1"), [0]),
+            ("norm2", Normalizer(norm="l1"), [1, 2]),
+            ("norm3", Normalizer(norm="l1"), [True] + (N_FEATURES - 1) * [False]),
+            ("norm4", Normalizer(norm="l1"), np.array([1, 2])),
+            ("norm5", Normalizer(norm="l1"), slice(3)),
+            ("norm6", Normalizer(norm="l1"), slice(-10, -3, 2)),
+        ],
+    )
+
+    yield GridSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+    )
+
+    yield HalvingGridSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+    )
+
+    yield HalvingRandomSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+    )
+
+    yield RandomizedSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+        n_iter=3,
+    )
+
+    dictionary = np.random.randint(-2, 3, size=(5, N_FEATURES)).astype(float)
+    yield SparseCoder(
+        dictionary=dictionary,
+        transform_algorithm="lasso_lars",
+    )

From 4fde78242a47c9ccf1036dad85d6f9137390ce2e Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Mon, 5 Dec 2022 12:19:47 +0100
Subject: [PATCH 4/8] fixed typo in test

---
 skops/io/tests/test_audit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skops/io/tests/test_audit.py b/skops/io/tests/test_audit.py
index be29e89e..ba631137 100644
--- a/skops/io/tests/test_audit.py
+++ b/skops/io/tests/test_audit.py
@@ -153,7 +153,7 @@ def __init__(self):
 )
 def test_sklearn_trusted_types(estimator):
     untrusted_types = get_untrusted_types(data=dumps(estimator))
-    sklearn_untrusted_types = [t for t in untrusted_types if t.startswith("skelarn.")]
+    sklearn_untrusted_types = [t for t in untrusted_types if t.startswith("sklearn.")]
     assert len(sklearn_untrusted_types) == 0
 
 

From 2a96fc55dd169cf87984c746f4677f680203121c Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Tue, 6 Dec 2022 22:57:11 +0100
Subject: [PATCH 5/8] moved sklearn_untrusted_type inside
 test_can_persist_fitted

---
 skops/io/tests/test_audit.py    |  11 ---
 skops/io/tests/test_persist.py  | 142 ++++++++++++++++++++++++++++++--
 skops/io/tests/testing_utils.py | 139 -------------------------------
 3 files changed, 135 insertions(+), 157 deletions(-)
 delete mode 100644 skops/io/tests/testing_utils.py

diff --git a/skops/io/tests/test_audit.py b/skops/io/tests/test_audit.py
index ba631137..71914b4d 100644
--- a/skops/io/tests/test_audit.py
+++ b/skops/io/tests/test_audit.py
@@ -9,13 +9,11 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import FeatureUnion, Pipeline
 from sklearn.preprocessing import FunctionTransformer, StandardScaler
-from sklearn.utils.estimator_checks import _get_check_estimator_ids
 
 from skops.io import dumps, get_untrusted_types
 from skops.io._audit import Node, audit_tree, check_type, get_tree, temp_setattr
 from skops.io._general import DictNode, dict_get_state
 from skops.io._utils import LoadContext, SaveContext, gettype
-from skops.io.tests.testing_utils import get_tested_estimators
 
 
 class CustomType:
@@ -148,15 +146,6 @@ def __init__(self):
     assert not hasattr(temp, "b")
 
 
-@pytest.mark.parametrize(
-    "estimator", get_tested_estimators(), ids=_get_check_estimator_ids
-)
-def test_sklearn_trusted_types(estimator):
-    untrusted_types = get_untrusted_types(data=dumps(estimator))
-    sklearn_untrusted_types = [t for t in untrusted_types if t.startswith("sklearn.")]
-    assert len(sklearn_untrusted_types) == 0
-
-
 def test_complex_pipeline_untrusted_set():
     # fmt: off
     clf = Pipeline([
diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py
index 29dcff36..ce1c6ec9 100644
--- a/skops/io/tests/test_persist.py
+++ b/skops/io/tests/test_persist.py
@@ -5,24 +5,37 @@
 import sys
 import warnings
 from collections import Counter
-from functools import wraps
+from functools import partial, wraps
 from pathlib import Path
 from zipfile import ZipFile
 
 import joblib
 import numpy as np
 import pytest
-from scipy import sparse
+from scipy import sparse, special
 from sklearn.base import BaseEstimator, is_regressor
+from sklearn.compose import ColumnTransformer
 from sklearn.datasets import load_sample_images, make_classification, make_regression
+from sklearn.decomposition import SparseCoder
 from sklearn.exceptions import SkipTestWarning
 from sklearn.experimental import enable_halving_search_cv  # noqa
 from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GroupKFold, KFold, ShuffleSplit, check_cv
+from sklearn.model_selection import (
+    GridSearchCV,
+    GroupKFold,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    KFold,
+    RandomizedSearchCV,
+    ShuffleSplit,
+    check_cv,
+)
+from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
 from sklearn.pipeline import FeatureUnion, Pipeline
 from sklearn.preprocessing import (
     FunctionTransformer,
     MinMaxScaler,
+    Normalizer,
     PolynomialFeatures,
     StandardScaler,
 )
@@ -45,7 +58,10 @@
 from skops.io._sklearn import UNSUPPORTED_TYPES
 from skops.io._utils import LoadContext, SaveContext, _get_state, get_state
 from skops.io.exceptions import UnsupportedTypeException
-from skops.io.tests.testing_utils import N_FEATURES, N_SAMPLES, get_tested_estimators
+
+# Default settings for X
+N_SAMPLES = 50
+N_FEATURES = 20
 
 # TODO: Investigate why that seems to be an issue on MacOS (only observed with
 # Python 3.8)
@@ -106,6 +122,112 @@ def wrapper(state, load_context, trusted):
             NODE_TYPE_MAPPING[key] = debug_get_tree(method)
 
 
+def _tested_estimators(type_filter=None):
+    for name, Estimator in all_estimators(type_filter=type_filter):
+        if Estimator in UNSUPPORTED_TYPES:
+            continue
+        try:
+            # suppress warnings here for skipped estimators.
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore",
+                    category=SkipTestWarning,
+                    message="Can't instantiate estimator",
+                )
+                estimator = _construct_instance(Estimator)
+                # with the kind of data we pass, it needs to be 1 for the few
+                # estimators which have this.
+                if "n_components" in estimator.get_params():
+                    estimator.set_params(n_components=1)
+                    # Then n_best needs to be <= n_components
+                    if "n_best" in estimator.get_params():
+                        estimator.set_params(n_best=1)
+                if "patch_size" in estimator.get_params():
+                    # set patch size to fix PatchExtractor test.
+                    estimator.set_params(patch_size=(3, 3))
+        except SkipTest:
+            continue
+
+        yield estimator
+
+    # nested Pipeline & FeatureUnion
+    # fmt: off
+    yield Pipeline([
+        ("features", FeatureUnion([
+            ("scaler", StandardScaler()),
+            ("scaled-poly", Pipeline([
+                ("polys", FeatureUnion([
+                    ("poly1", PolynomialFeatures()),
+                    ("poly2", PolynomialFeatures(degree=3, include_bias=False))
+                ])),
+                ("scale", MinMaxScaler()),
+            ])),
+        ])),
+        ("clf", LogisticRegression(random_state=0, solver="liblinear")),
+    ])
+    # fmt: on
+
+    # FunctionTransformer with numpy functions
+    yield FunctionTransformer(
+        func=np.sqrt,
+        inverse_func=np.square,
+    )
+
+    # FunctionTransformer with scipy functions - problem is that they look like
+    # numpy ufuncs
+    yield FunctionTransformer(
+        func=special.erf,
+        inverse_func=special.erfinv,
+    )
+
+    # partial functions should be supported
+    yield FunctionTransformer(
+        func=partial(np.add, 10),
+        inverse_func=partial(np.add, -10),
+    )
+
+    yield KNeighborsClassifier(algorithm="kd_tree")
+    yield KNeighborsRegressor(algorithm="ball_tree")
+
+    yield ColumnTransformer(
+        [
+            ("norm1", Normalizer(norm="l1"), [0]),
+            ("norm2", Normalizer(norm="l1"), [1, 2]),
+            ("norm3", Normalizer(norm="l1"), [True] + (N_FEATURES - 1) * [False]),
+            ("norm4", Normalizer(norm="l1"), np.array([1, 2])),
+            ("norm5", Normalizer(norm="l1"), slice(3)),
+            ("norm6", Normalizer(norm="l1"), slice(-10, -3, 2)),
+        ],
+    )
+
+    yield GridSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+    )
+
+    yield HalvingGridSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+    )
+
+    yield HalvingRandomSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+    )
+
+    yield RandomizedSearchCV(
+        LogisticRegression(random_state=0, solver="liblinear"),
+        {"C": [1, 2, 3, 4, 5]},
+        n_iter=3,
+    )
+
+    dictionary = np.random.randint(-2, 3, size=(5, N_FEATURES)).astype(float)
+    yield SparseCoder(
+        dictionary=dictionary,
+        transform_algorithm="lasso_lars",
+    )
+
+
 def _unsupported_estimators(type_filter=None):
     for name, Estimator in all_estimators(type_filter=type_filter):
         if Estimator not in UNSUPPORTED_TYPES:
@@ -276,7 +398,7 @@ def assert_params_equal(params1, params2):
 
 
 @pytest.mark.parametrize(
-    "estimator", get_tested_estimators(), ids=_get_check_estimator_ids
+    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
 )
 def test_can_persist_non_fitted(estimator):
     """Check that non-fitted estimators can be persisted."""
@@ -344,9 +466,9 @@ def get_input(estimator):
 
 
 @pytest.mark.parametrize(
-    "estimator", get_tested_estimators(), ids=_get_check_estimator_ids
+    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
 )
-def test_can_persist_fitted(estimator, request):
+def test_can_persist_fitted(estimator):
     """Check that fitted estimators can be persisted and return the right results."""
     set_random_state(estimator, random_state=0)
 
@@ -369,6 +491,12 @@ def test_can_persist_fitted(estimator, request):
     loaded = loads(dumped, trusted=untrusted_types)
     assert_params_equal(estimator.__dict__, loaded.__dict__)
 
+    # test that most sklearn estimators are not in untrusted_types
+    sklearn_untrusted_types = [
+        type_ for type_ in untrusted_types if type_.startswith("sklearn.")
+    ]
+    assert len(sklearn_untrusted_types) == 0
+
     for method in [
         "predict",
         "predict_proba",
diff --git a/skops/io/tests/testing_utils.py b/skops/io/tests/testing_utils.py
deleted file mode 100644
index 48e66dd0..00000000
--- a/skops/io/tests/testing_utils.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import warnings
-from functools import partial
-
-import numpy as np
-from scipy import special
-from sklearn.compose import ColumnTransformer
-from sklearn.decomposition import SparseCoder
-from sklearn.exceptions import SkipTestWarning
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import (
-    GridSearchCV,
-    HalvingGridSearchCV,
-    HalvingRandomSearchCV,
-    RandomizedSearchCV,
-)
-from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
-from sklearn.pipeline import FeatureUnion, Pipeline
-from sklearn.preprocessing import (
-    FunctionTransformer,
-    MinMaxScaler,
-    Normalizer,
-    PolynomialFeatures,
-    StandardScaler,
-)
-from sklearn.utils import all_estimators
-from sklearn.utils._testing import SkipTest
-from sklearn.utils.estimator_checks import _construct_instance
-
-from skops.io._sklearn import UNSUPPORTED_TYPES
-
-# Default settings for X
-N_SAMPLES = 50
-N_FEATURES = 20
-
-
-def get_tested_estimators(type_filter=None):
-    for name, Estimator in all_estimators(type_filter=type_filter):
-        if Estimator in UNSUPPORTED_TYPES:
-            continue
-        try:
-            # suppress warnings here for skipped estimators.
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore",
-                    category=SkipTestWarning,
-                    message="Can't instantiate estimator",
-                )
-                estimator = _construct_instance(Estimator)
-                # with the kind of data we pass, it needs to be 1 for the few
-                # estimators which have this.
-                if "n_components" in estimator.get_params():
-                    estimator.set_params(n_components=1)
-                    # Then n_best needs to be <= n_components
-                    if "n_best" in estimator.get_params():
-                        estimator.set_params(n_best=1)
-                if "patch_size" in estimator.get_params():
-                    # set patch size to fix PatchExtractor test.
-                    estimator.set_params(patch_size=(3, 3))
-        except SkipTest:
-            continue
-
-        yield estimator
-
-    # nested Pipeline & FeatureUnion
-    # fmt: off
-    yield Pipeline([
-        ("features", FeatureUnion([
-            ("scaler", StandardScaler()),
-            ("scaled-poly", Pipeline([
-                ("polys", FeatureUnion([
-                    ("poly1", PolynomialFeatures()),
-                    ("poly2", PolynomialFeatures(degree=3, include_bias=False))
-                ])),
-                ("scale", MinMaxScaler()),
-            ])),
-        ])),
-        ("clf", LogisticRegression(random_state=0, solver="liblinear")),
-    ])
-    # fmt: on
-
-    # FunctionTransformer with numpy functions
-    yield FunctionTransformer(
-        func=np.sqrt,
-        inverse_func=np.square,
-    )
-
-    # FunctionTransformer with scipy functions - problem is that they look like
-    # numpy ufuncs
-    yield FunctionTransformer(
-        func=special.erf,
-        inverse_func=special.erfinv,
-    )
-
-    # partial functions should be supported
-    yield FunctionTransformer(
-        func=partial(np.add, 10),
-        inverse_func=partial(np.add, -10),
-    )
-
-    yield KNeighborsClassifier(algorithm="kd_tree")
-    yield KNeighborsRegressor(algorithm="ball_tree")
-
-    yield ColumnTransformer(
-        [
-            ("norm1", Normalizer(norm="l1"), [0]),
-            ("norm2", Normalizer(norm="l1"), [1, 2]),
-            ("norm3", Normalizer(norm="l1"), [True] + (N_FEATURES - 1) * [False]),
-            ("norm4", Normalizer(norm="l1"), np.array([1, 2])),
-            ("norm5", Normalizer(norm="l1"), slice(3)),
-            ("norm6", Normalizer(norm="l1"), slice(-10, -3, 2)),
-        ],
-    )
-
-    yield GridSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-    )
-
-    yield HalvingGridSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-    )
-
-    yield HalvingRandomSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-    )
-
-    yield RandomizedSearchCV(
-        LogisticRegression(random_state=0, solver="liblinear"),
-        {"C": [1, 2, 3, 4, 5]},
-        n_iter=3,
-    )
-
-    dictionary = np.random.randint(-2, 3, size=(5, N_FEATURES)).astype(float)
-    yield SparseCoder(
-        dictionary=dictionary,
-        transform_algorithm="lasso_lars",
-    )

From 6e56ff63a179f4c3d393edc2ae5fa7a37f316da2 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Tue, 6 Dec 2022 23:22:52 +0100
Subject: [PATCH 6/8] excluded sklearn types in tests that are not yet trusted

---
 skops/io/tests/test_persist.py | 48 ++++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py
index ce1c6ec9..2831cbeb 100644
--- a/skops/io/tests/test_persist.py
+++ b/skops/io/tests/test_persist.py
@@ -67,6 +67,47 @@
 # Python 3.8)
 ATOL = 1e-6 if sys.platform == "darwin" else 1e-7
 
+# TODO: remove when these are added to trusted types
+SKLEARN_EXCLUDED_TYPES = (
+    "sklearn._loss._loss.CyHalfBinomialLoss",
+    "sklearn._loss._loss.CyHalfGammaLoss",
+    "sklearn._loss._loss.CyHalfPoissonLoss",
+    "sklearn._loss._loss.CyHalfSquaredError",
+    "sklearn._loss._loss.CyHalfTweedieLossIdentity",
+    "sklearn._loss.link.IdentityLink",
+    "sklearn._loss.link.Interval",
+    "sklearn._loss.link.LogLink",
+    "sklearn._loss.link.LogitLink",
+    "sklearn._loss.loss.HalfBinomialLoss",
+    "sklearn._loss.loss.HalfGammaLoss",
+    "sklearn._loss.loss.HalfPoissonLoss",
+    "sklearn._loss.loss.HalfSquaredError",
+    "sklearn._loss.loss.HalfTweedieLossIdentity",
+    "sklearn.calibration._CalibratedClassifier",
+    "sklearn.calibration._SigmoidCalibration",
+    "sklearn.cluster._bisect_k_means._BisectingTree",
+    "sklearn.cluster._kmeans._kmeans_single_lloyd",
+    "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys",
+    "sklearn.ensemble._gb_losses.BinomialDeviance",
+    "sklearn.ensemble._gb_losses.LeastSquaresError",
+    "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper",
+    "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor",
+    "sklearn.feature_selection._univariate_selection.f_classif",
+    "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace",
+    "sklearn.gaussian_process.kernels.ConstantKernel",
+    "sklearn.gaussian_process.kernels.Product",
+    "sklearn.gaussian_process.kernels.RBF",
+    "sklearn.impute._iterative._ImputerTriplet",
+    "sklearn.metrics._dist_metrics.EuclideanDistance",
+    "sklearn.metrics._scorer._passthrough_scorer",
+    "sklearn.model_selection._split.StratifiedKFold",
+    "sklearn.multiclass._ConstantPredictor",
+    "sklearn.neighbors._ball_tree.BallTree",
+    "sklearn.neighbors._kd_tree.KDTree",
+    "sklearn.neural_network._stochastic_optimizers.AdamOptimizer",
+    "sklearn.utils._bunch.Bunch",
+)
+
 
 @pytest.fixture(autouse=True, scope="module")
 def debug_dispatch_functions():
@@ -491,9 +532,12 @@ def test_can_persist_fitted(estimator):
     loaded = loads(dumped, trusted=untrusted_types)
     assert_params_equal(estimator.__dict__, loaded.__dict__)
 
-    # test that most sklearn estimators are not in untrusted_types
+    # test that sklearn types are trusted. Some known types are excluded
+    # from testing because they are not in the trusted list yet.
     sklearn_untrusted_types = [
-        type_ for type_ in untrusted_types if type_.startswith("sklearn.")
+        type_
+        for type_ in untrusted_types
+        if type_.startswith("sklearn.") and type_ not in SKLEARN_EXCLUDED_TYPES
     ]
     assert len(sklearn_untrusted_types) == 0
 

From 1fedb2c259e48c7b453e3a4aa39a1a482457e74a Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Wed, 7 Dec 2022 22:30:59 +0100
Subject: [PATCH 7/8] adjusted test for SKLEARN_ESTIMATOR_TYPE_NAMES

---
 skops/io/tests/test_persist.py | 51 ++--------------------------------
 1 file changed, 2 insertions(+), 49 deletions(-)

diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py
index 2831cbeb..012ab7f7 100644
--- a/skops/io/tests/test_persist.py
+++ b/skops/io/tests/test_persist.py
@@ -56,6 +56,7 @@
 from skops.io import dump, dumps, get_untrusted_types, load, loads
 from skops.io._audit import NODE_TYPE_MAPPING, get_tree
 from skops.io._sklearn import UNSUPPORTED_TYPES
+from skops.io._trusted_types import SKLEARN_ESTIMATOR_TYPE_NAMES
 from skops.io._utils import LoadContext, SaveContext, _get_state, get_state
 from skops.io.exceptions import UnsupportedTypeException
 
@@ -67,47 +68,6 @@
 # Python 3.8)
 ATOL = 1e-6 if sys.platform == "darwin" else 1e-7
 
-# TODO: remove when these are added to trusted types
-SKLEARN_EXCLUDED_TYPES = (
-    "sklearn._loss._loss.CyHalfBinomialLoss",
-    "sklearn._loss._loss.CyHalfGammaLoss",
-    "sklearn._loss._loss.CyHalfPoissonLoss",
-    "sklearn._loss._loss.CyHalfSquaredError",
-    "sklearn._loss._loss.CyHalfTweedieLossIdentity",
-    "sklearn._loss.link.IdentityLink",
-    "sklearn._loss.link.Interval",
-    "sklearn._loss.link.LogLink",
-    "sklearn._loss.link.LogitLink",
-    "sklearn._loss.loss.HalfBinomialLoss",
-    "sklearn._loss.loss.HalfGammaLoss",
-    "sklearn._loss.loss.HalfPoissonLoss",
-    "sklearn._loss.loss.HalfSquaredError",
-    "sklearn._loss.loss.HalfTweedieLossIdentity",
-    "sklearn.calibration._CalibratedClassifier",
-    "sklearn.calibration._SigmoidCalibration",
-    "sklearn.cluster._bisect_k_means._BisectingTree",
-    "sklearn.cluster._kmeans._kmeans_single_lloyd",
-    "sklearn.covariance._graph_lasso._DictWithDeprecatedKeys",
-    "sklearn.ensemble._gb_losses.BinomialDeviance",
-    "sklearn.ensemble._gb_losses.LeastSquaresError",
-    "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper",
-    "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor",
-    "sklearn.feature_selection._univariate_selection.f_classif",
-    "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace",
-    "sklearn.gaussian_process.kernels.ConstantKernel",
-    "sklearn.gaussian_process.kernels.Product",
-    "sklearn.gaussian_process.kernels.RBF",
-    "sklearn.impute._iterative._ImputerTriplet",
-    "sklearn.metrics._dist_metrics.EuclideanDistance",
-    "sklearn.metrics._scorer._passthrough_scorer",
-    "sklearn.model_selection._split.StratifiedKFold",
-    "sklearn.multiclass._ConstantPredictor",
-    "sklearn.neighbors._ball_tree.BallTree",
-    "sklearn.neighbors._kd_tree.KDTree",
-    "sklearn.neural_network._stochastic_optimizers.AdamOptimizer",
-    "sklearn.utils._bunch.Bunch",
-)
-
 
 @pytest.fixture(autouse=True, scope="module")
 def debug_dispatch_functions():
@@ -532,14 +492,7 @@ def test_can_persist_fitted(estimator):
     loaded = loads(dumped, trusted=untrusted_types)
     assert_params_equal(estimator.__dict__, loaded.__dict__)
 
-    # test that sklearn types are trusted. Some known types are excluded
-    # from testing because they are not in the trusted list yet.
-    sklearn_untrusted_types = [
-        type_
-        for type_ in untrusted_types
-        if type_.startswith("sklearn.") and type_ not in SKLEARN_EXCLUDED_TYPES
-    ]
-    assert len(sklearn_untrusted_types) == 0
+    assert not any(type_ in SKLEARN_ESTIMATOR_TYPE_NAMES for type_ in untrusted_types)
 
     for method in [
         "predict",

From df2cc5a3bb2d3a92db9bcfc7023223a4ab3e4081 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Sun, 11 Dec 2022 18:43:59 +0100
Subject: [PATCH 8/8] updated changelog

---
 docs/changes.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/changes.rst b/docs/changes.rst
index 034074a4..bea77aab 100644
--- a/docs/changes.rst
+++ b/docs/changes.rst
@@ -14,6 +14,8 @@ v0.4
 - :func:`.io.dump` and :func:`.io.load` now work with file like objects,
   which means you can use them with the ``with open(...) as f: dump(obj, f)``
   pattern, like you'd do with ``pickle``. :pr:`234` by `Benjamin Bossan`_.
+- All `scikit-learn` estimators are trusted by default.
+  :pr:`237` by :user:`Edoardo Abati <EdAbati>`.
 
 v0.3
 ----