From 89561a07856a38abf62cfa258d739ce50f9cfc5c Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Sat, 1 Apr 2023 13:47:30 +0200 Subject: [PATCH 1/9] #224: trusting np.ufuncs and np.dtypes --- skops/io/_general.py | 10 ++++++++-- skops/io/_numpy.py | 3 ++- skops/io/_trusted_types.py | 22 ++++++++++++++++++++++ skops/io/tests/test_persist.py | 32 ++++++++++++++++++++++++++++---- 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/skops/io/_general.py b/skops/io/_general.py index 5a5f3479..13fa72f3 100644 --- a/skops/io/_general.py +++ b/skops/io/_general.py @@ -13,6 +13,8 @@ from ._audit import Node, get_tree from ._protocol import PROTOCOL from ._trusted_types import ( + NUMPY_DTYPE_TYPE_NAMES, + NUMPY_UFUNC_TYPE_NAMES, PRIMITIVE_TYPE_NAMES, SCIPY_UFUNC_TYPE_NAMES, SKLEARN_ESTIMATOR_TYPE_NAMES, @@ -197,7 +199,9 @@ def __init__( ) -> None: super().__init__(state, load_context, trusted) # TODO: what do we trust? - self.trusted = self._get_trusted(trusted, default=SCIPY_UFUNC_TYPE_NAMES) + self.trusted = self._get_trusted( + trusted, default=SCIPY_UFUNC_TYPE_NAMES + NUMPY_UFUNC_TYPE_NAMES + ) self.children = {} def _construct(self): @@ -278,7 +282,9 @@ def __init__( ) -> None: super().__init__(state, load_context, trusted) # TODO: what do we trust? - self.trusted = self._get_trusted(trusted, PRIMITIVE_TYPE_NAMES) + self.trusted = self._get_trusted( + trusted, PRIMITIVE_TYPE_NAMES + NUMPY_DTYPE_TYPE_NAMES + ) # We use a bare Node type here since a Node only checks the type in the # dict using __class__ and __module__ keys. self.children = {} diff --git a/skops/io/_numpy.py b/skops/io/_numpy.py index d5243f15..4b52ad4c 100644 --- a/skops/io/_numpy.py +++ b/skops/io/_numpy.py @@ -8,6 +8,7 @@ from ._audit import Node, get_tree from ._general import function_get_state from ._protocol import PROTOCOL +from ._trusted_types import NUMPY_DTYPE_TYPE_NAMES from ._utils import LoadContext, SaveContext, get_module, get_state, gettype from .exceptions import UnsupportedTypeException @@ -60,7 +61,7 @@ def __init__( ) -> None: super().__init__(state, load_context, trusted) self.type = state["type"] - self.trusted = self._get_trusted(trusted, [np.ndarray]) + self.trusted = self._get_trusted(trusted, [np.ndarray] + NUMPY_DTYPE_TYPE_NAMES) if self.type == "numpy": self.children = { "content": io.BytesIO(load_context.src.read(state["file"])) diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py index 39e55573..95104dd9 100644 --- a/skops/io/_trusted_types.py +++ b/skops/io/_trusted_types.py @@ -24,3 +24,25 @@ ] ) ) + +NUMPY_UFUNC_TYPE_NAMES = sorted( + set( + [ + get_type_name(getattr(np, attr)) + for attr in dir(np) + if isinstance(getattr(np, attr), np.ufunc) + and get_type_name(getattr(np, attr)).startswith("numpy") + ] + ) +) + +NUMPY_DTYPE_TYPE_NAMES = sorted( + set( + [ + get_type_name(dtype) + for dtypes in np.sctypes.values() + for dtype in dtypes + if get_type_name(dtype).startswith("numpy") + ] + ) +) diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index 1724a4c1..1139ac93 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -53,7 +53,13 @@ from skops.io import dump, dumps, get_untrusted_types, load, loads from skops.io._audit import NODE_TYPE_MAPPING, get_tree from skops.io._sklearn import UNSUPPORTED_TYPES -from skops.io._trusted_types import SCIPY_UFUNC_TYPE_NAMES, SKLEARN_ESTIMATOR_TYPE_NAMES +from skops.io._trusted_types import ( + NUMPY_DTYPE_TYPE_NAMES, + NUMPY_UFUNC_TYPE_NAMES, + PRIMITIVE_TYPE_NAMES, + SCIPY_UFUNC_TYPE_NAMES, + SKLEARN_ESTIMATOR_TYPE_NAMES, +) from skops.io._utils import LoadContext, SaveContext, _get_state, get_state, gettype from skops.io.exceptions import UnsupportedTypeException, UntrustedTypesFoundException from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal @@ -224,11 +230,17 @@ def _tested_estimators(type_filter=None): def _tested_ufuncs(): - for full_name in SCIPY_UFUNC_TYPE_NAMES: + for full_name in SCIPY_UFUNC_TYPE_NAMES + NUMPY_UFUNC_TYPE_NAMES: module_name, _, ufunc_name = full_name.rpartition(".") yield gettype(module_name=module_name, cls_or_func=ufunc_name) +def _tested_types(): + for full_name in PRIMITIVE_TYPE_NAMES + NUMPY_DTYPE_TYPE_NAMES: + module_name, _, type_name = full_name.rpartition(".") + yield gettype(module_name=module_name, cls_or_func=type_name) + + def _unsupported_estimators(type_filter=None): for name, Estimator in all_estimators(type_filter=type_filter): if Estimator not in UNSUPPORTED_TYPES: @@ -356,15 +368,27 @@ def test_can_persist_fitted(estimator): assert not any(type_ in SKLEARN_ESTIMATOR_TYPE_NAMES for type_ in untrusted_types) assert not any(type_ in SCIPY_UFUNC_TYPE_NAMES for type_ in untrusted_types) + assert not any(type_ in NUMPY_UFUNC_TYPE_NAMES for type_ in untrusted_types) + assert not any(type_ in NUMPY_DTYPE_TYPE_NAMES for type_ in untrusted_types) assert_method_outputs_equal(estimator, loaded, X) -@pytest.mark.parametrize("ufunc", _tested_ufuncs(), ids=SCIPY_UFUNC_TYPE_NAMES) +@pytest.mark.parametrize( + "ufunc", _tested_ufuncs(), ids=SCIPY_UFUNC_TYPE_NAMES + NUMPY_UFUNC_TYPE_NAMES +) def test_can_trust_ufuncs(ufunc): dumped = dumps(ufunc) untrusted_types = get_untrusted_types(data=dumped) assert len(untrusted_types) == 0 - # TODO: extend with numpy ufuncs + + +@pytest.mark.parametrize( + "type_", _tested_types(), ids=PRIMITIVE_TYPE_NAMES + NUMPY_DTYPE_TYPE_NAMES +) +def test_can_trust_types(type_): + dumped = dumps(type_) + untrusted_types = get_untrusted_types(data=dumped) + assert len(untrusted_types) == 0 @pytest.mark.parametrize( From 22b0043456fc8abf343ca507a8e8c4a0bfd09764 Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Sat, 1 Apr 2023 13:58:39 +0200 Subject: [PATCH 2/9] #224: updating changes.rst --- docs/changes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changes.rst b/docs/changes.rst index 95c446f3..bf2e627b 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -31,6 +31,8 @@ v0.6 - Fix issue with persisting :class:`numpy.random.Generator` using the skops format (the object could be loaded correctly but security could not be checked). :pr:`331` by `Benjamin Bossan`_. +- All public ``numpy`` ufuncs (Universal Functions) and dtypes are trusted by default + by :func:`.io.load`. :pr:`336` by :user:`Omar Arab Oghli `. v0.5 ---- From 3fc51160e7b95ddd990d491260ee13904d209b8c Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Sat, 1 Apr 2023 14:15:02 +0200 Subject: [PATCH 3/9] #224: fixing failing tests. --- skops/io/tests/test_audit.py | 10 ++++++---- skops/io/tests/test_visualize.py | 8 ++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/skops/io/tests/test_audit.py b/skops/io/tests/test_audit.py index 8bd68867..41b5840f 100644 --- a/skops/io/tests/test_audit.py +++ b/skops/io/tests/test_audit.py @@ -151,9 +151,9 @@ def test_complex_pipeline_untrusted_set(): clf = Pipeline([ ("features", FeatureUnion([ ("scaler", StandardScaler()), - ("sqrt", FunctionTransformer( - func=np.sqrt, - inverse_func=np.square, + ("np.funcs", FunctionTransformer( + func=np.split, + inverse_func=np.angle, )), ])), ("clf", LogisticRegression(random_state=0, solver="liblinear")), @@ -162,7 +162,9 @@ def test_complex_pipeline_untrusted_set(): untrusted = get_untrusted_types(data=dumps(clf)) type_names = [x.split(".")[-1] for x in untrusted] - assert type_names == ["sqrt", "square"] + + # choosing random numpy functions that are yet not considered as default trusted ones. + assert type_names == ["angle", "split"] def test_format_object_node(): diff --git a/skops/io/tests/test_visualize.py b/skops/io/tests/test_visualize.py index 91dab864..f29e7260 100644 --- a/skops/io/tests/test_visualize.py +++ b/skops/io/tests/test_visualize.py @@ -101,12 +101,8 @@ def sink(nodes_iter, *args, **kwargs): nodes_self_unsafe = [node for node in nodes if not node.is_self_safe] nodes_unsafe = [node for node in nodes if not node.is_safe] - # there are currently 2 unsafe nodes, a numpy int and the custom - # functions. The former might be considered safe in the future, in which - # case this test needs to be changed. - assert len(nodes_self_unsafe) == 2 - assert nodes_self_unsafe[0].val == "numpy.int64" - assert nodes_self_unsafe[1].val == "test_visualize.unsafe_function" + assert len(nodes_self_unsafe) == 1 + assert nodes_self_unsafe[0].val == "test_visualize.unsafe_function" # it's not easy to test the number of indirectly unsafe nodes, because # it will depend on the nesting structure; we can only be sure that it's From 3ad5f78e1f92dc5366a7397320159302296da142 Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Sat, 1 Apr 2023 14:54:53 +0200 Subject: [PATCH 4/9] #224: ignoring mypy errors. Should be safe as long as np.sctypes has the same type. --- skops/io/_numpy.py | 4 +++- skops/io/_trusted_types.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/skops/io/_numpy.py b/skops/io/_numpy.py index 4b52ad4c..4724560c 100644 --- a/skops/io/_numpy.py +++ b/skops/io/_numpy.py @@ -61,7 +61,9 @@ def __init__( ) -> None: super().__init__(state, load_context, trusted) self.type = state["type"] - self.trusted = self._get_trusted(trusted, [np.ndarray] + NUMPY_DTYPE_TYPE_NAMES) + self.trusted = self._get_trusted( + trusted, [np.ndarray] + NUMPY_DTYPE_TYPE_NAMES # type: ignore + ) if self.type == "numpy": self.children = { "content": io.BytesIO(load_context.src.read(state["file"])) diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py index 95104dd9..734c4958 100644 --- a/skops/io/_trusted_types.py +++ b/skops/io/_trusted_types.py @@ -41,7 +41,7 @@ [ get_type_name(dtype) for dtypes in np.sctypes.values() - for dtype in dtypes + for dtype in dtypes # type: ignore if get_type_name(dtype).startswith("numpy") ] ) From 71818db9766563a79c8849cd34ddd59d51eea7e1 Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Tue, 11 Apr 2023 12:02:36 +0200 Subject: [PATCH 5/9] #224: resolving review remarks. --- docs/changes.rst | 5 ++--- docs/persistence.rst | 5 +++-- skops/io/_numpy.py | 2 ++ skops/io/_trusted_types.py | 38 +++++++++--------------------------- skops/io/_utils.py | 34 ++++++++++++++++++++++++++++++++ skops/io/tests/test_audit.py | 24 ----------------------- 6 files changed, 50 insertions(+), 58 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 7a66e335..4134a58e 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -12,7 +12,8 @@ skops Changelog v0.7 ---- - +- All public ``numpy`` ufuncs (Universal Functions) and dtypes are trusted by default + by :func:`.io.load`. :pr:`336` by :user:`Omar Arab Oghli `. v0.6 ---- @@ -36,8 +37,6 @@ v0.6 - Fix issue with persisting :class:`numpy.random.Generator` using the skops format (the object could be loaded correctly but security could not be checked). :pr:`331` by `Benjamin Bossan`_. -- All public ``numpy`` ufuncs (Universal Functions) and dtypes are trusted by default - by :func:`.io.load`. :pr:`336` by :user:`Omar Arab Oghli `. v0.5 ---- diff --git a/docs/persistence.rst b/docs/persistence.rst index 2ed2819e..3e2d43c3 100644 --- a/docs/persistence.rst +++ b/docs/persistence.rst @@ -82,7 +82,7 @@ using :func:`skops.io.get_untrusted_types`: from skops.io import get_untrusted_types unknown_types = get_untrusted_types(file="my-model.skops") print(unknown_types) - ['numpy.float64', 'numpy.int64', 'sklearn.metrics._scorer._passthrough_scorer', + ['sklearn.metrics._scorer._passthrough_scorer', 'xgboost.core.Booster', 'xgboost.sklearn.XGBClassifier'] Note that everything in the above list is safe to load. We already have many @@ -108,7 +108,8 @@ At the moment, ``skops`` cannot persist arbitrary Python code. This means if you have custom functions (say, a custom function to be used with :class:`sklearn.preprocessing.FunctionTransformer`), it will not work. However, most ``numpy`` and ``scipy`` functions should work. Therefore, you can save -objects having references to functions such as ``numpy.sqrt``. +objects having references to functions or universal functions (ufuncs) +such as ``numpy.sqrt``. Command Line Interface ###################### diff --git a/skops/io/_numpy.py b/skops/io/_numpy.py index 881dcaf0..1a6a477f 100644 --- a/skops/io/_numpy.py +++ b/skops/io/_numpy.py @@ -53,6 +53,8 @@ def ndarray_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: class NdArrayNode(Node): + # TODO: NdArrayNode and DtypeNode names lead to confusion, see PR-336 + def __init__( self, state: dict[str, Any], diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py index 734c4958..fc3a7698 100644 --- a/skops/io/_trusted_types.py +++ b/skops/io/_trusted_types.py @@ -2,7 +2,7 @@ import scipy from sklearn.utils import all_estimators -from ._utils import get_type_name +from ._utils import get_public_type_names, get_type_name PRIMITIVES_TYPES = [int, float, str, bool] @@ -14,35 +14,15 @@ if get_type_name(estimator_class).startswith("sklearn.") ] -SCIPY_UFUNC_TYPE_NAMES = sorted( - set( - [ - get_type_name(getattr(scipy.special, attr)) - for attr in dir(scipy.special) - if isinstance(getattr(scipy.special, attr), np.ufunc) - and get_type_name(getattr(scipy.special, attr)).startswith("scipy") - ] - ) -) +SCIPY_UFUNC_TYPE_NAMES = get_public_type_names(module=scipy.special, _type=np.ufunc) -NUMPY_UFUNC_TYPE_NAMES = sorted( - set( - [ - get_type_name(getattr(np, attr)) - for attr in dir(np) - if isinstance(getattr(np, attr), np.ufunc) - and get_type_name(getattr(np, attr)).startswith("numpy") - ] - ) -) +NUMPY_UFUNC_TYPE_NAMES = get_public_type_names(module=np, _type=np.ufunc) NUMPY_DTYPE_TYPE_NAMES = sorted( - set( - [ - get_type_name(dtype) - for dtypes in np.sctypes.values() - for dtype in dtypes # type: ignore - if get_type_name(dtype).startswith("numpy") - ] - ) + { + type_name + for dtypes in np.sctypes.values() + for dtype in dtypes # type: ignore + if (type_name := get_type_name(dtype)).startswith("numpy") + } ) diff --git a/skops/io/_utils.py b/skops/io/_utils.py index f147d15a..837d7d20 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -4,6 +4,7 @@ import sys from dataclasses import dataclass, field from functools import singledispatch +from types import ModuleType from typing import Any, Type from zipfile import ZipFile @@ -200,3 +201,36 @@ def get_type_paths(types: Any) -> list[str]: types = [types] return [get_type_name(t) if not isinstance(t, str) else t for t in types] + + +def get_public_type_names(module: ModuleType, _type: Type) -> list[str]: + """ + Helper function that gets the type names of all + public objects of the given ``_type`` from the given ``module``, + which start with the root module name. + + Public objects are those that can be read via ``dir(...)``. + + Parameters + ---------- + module: ModuleType + Module under which the public objects are defined. + _type: Type + The type of the objects. + + Returns + ---------- + type_names_list: list of str + The sorted list of type names, all as strings, + e.g. ``["numpy.core._multiarray_umath.absolute"]``. + """ + module_name, _, _ = module.__name__.rpartition(".") + + return sorted( + { + type_name + for attr in dir(module) + if (isinstance(obj := getattr(module, attr), _type)) + and ((type_name := get_type_name(obj)).startswith(module_name)) + } + ) diff --git a/skops/io/tests/test_audit.py b/skops/io/tests/test_audit.py index 564540c1..756e9988 100644 --- a/skops/io/tests/test_audit.py +++ b/skops/io/tests/test_audit.py @@ -4,11 +4,8 @@ from contextlib import suppress from zipfile import ZipFile -import numpy as np import pytest from sklearn.linear_model import LogisticRegression -from sklearn.pipeline import FeatureUnion, Pipeline -from sklearn.preprocessing import FunctionTransformer, StandardScaler from skops.io import dumps, get_untrusted_types from skops.io._audit import Node, audit_tree, check_type, get_tree, temp_setattr @@ -152,27 +149,6 @@ def __init__(self): assert not hasattr(temp, "b") -def test_complex_pipeline_untrusted_set(): - # fmt: off - clf = Pipeline([ - ("features", FeatureUnion([ - ("scaler", StandardScaler()), - ("np.funcs", FunctionTransformer( - func=np.split, - inverse_func=np.angle, - )), - ])), - ("clf", LogisticRegression(random_state=0, solver="liblinear")), - ]) - # fmt: on - - untrusted = get_untrusted_types(data=dumps(clf)) - type_names = [x.split(".")[-1] for x in untrusted] - - # choosing random numpy functions that are yet not considered as default trusted ones. - assert type_names == ["angle", "split"] - - def test_format_object_node(): estimator = LogisticRegression(random_state=0, solver="liblinear") state = get_state(estimator, SaveContext(None)) From 98661bee33e02fcd1540acde9ffb9d676df17bf3 Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Thu, 13 Apr 2023 11:49:03 +0200 Subject: [PATCH 6/9] #224: replacing isinstance with issubclass and _type with oftype --- skops/io/_trusted_types.py | 4 ++-- skops/io/_utils.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py index fc3a7698..e48df817 100644 --- a/skops/io/_trusted_types.py +++ b/skops/io/_trusted_types.py @@ -14,9 +14,9 @@ if get_type_name(estimator_class).startswith("sklearn.") ] -SCIPY_UFUNC_TYPE_NAMES = get_public_type_names(module=scipy.special, _type=np.ufunc) +SCIPY_UFUNC_TYPE_NAMES = get_public_type_names(module=scipy.special, oftype=np.ufunc) -NUMPY_UFUNC_TYPE_NAMES = get_public_type_names(module=np, _type=np.ufunc) +NUMPY_UFUNC_TYPE_NAMES = get_public_type_names(module=np, oftype=np.ufunc) NUMPY_DTYPE_TYPE_NAMES = sorted( { diff --git a/skops/io/_utils.py b/skops/io/_utils.py index 837d7d20..07918673 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -203,7 +203,7 @@ def get_type_paths(types: Any) -> list[str]: return [get_type_name(t) if not isinstance(t, str) else t for t in types] -def get_public_type_names(module: ModuleType, _type: Type) -> list[str]: +def get_public_type_names(module: ModuleType, oftype: Type) -> list[str]: """ Helper function that gets the type names of all public objects of the given ``_type`` from the given ``module``, @@ -215,7 +215,7 @@ def get_public_type_names(module: ModuleType, _type: Type) -> list[str]: ---------- module: ModuleType Module under which the public objects are defined. - _type: Type + oftype: Type The type of the objects. Returns @@ -230,7 +230,7 @@ def get_public_type_names(module: ModuleType, _type: Type) -> list[str]: { type_name for attr in dir(module) - if (isinstance(obj := getattr(module, attr), _type)) + if (issubclass((obj := getattr(module, attr)).__class__, oftype)) and ((type_name := get_type_name(obj)).startswith(module_name)) } ) From f9e1cbaf3fb3c09065af66e3d8d1f57ecf480c1b Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Thu, 13 Apr 2023 16:44:26 +0200 Subject: [PATCH 7/9] #224: clarifying the confusion and removing extra parentheses. --- skops/io/_numpy.py | 4 +++- skops/io/_utils.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/skops/io/_numpy.py b/skops/io/_numpy.py index 1a6a477f..ec474ec4 100644 --- a/skops/io/_numpy.py +++ b/skops/io/_numpy.py @@ -53,7 +53,9 @@ def ndarray_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: class NdArrayNode(Node): - # TODO: NdArrayNode and DtypeNode names lead to confusion, see PR-336 + # TODO: NdArrayNode is not only responsible for np.arrays + # but also for np.generics, thus the confusion with DTypeNode. + # See PR-336 def __init__( self, diff --git a/skops/io/_utils.py b/skops/io/_utils.py index 07918673..c0b80770 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -230,7 +230,7 @@ def get_public_type_names(module: ModuleType, oftype: Type) -> list[str]: { type_name for attr in dir(module) - if (issubclass((obj := getattr(module, attr)).__class__, oftype)) - and ((type_name := get_type_name(obj)).startswith(module_name)) + if issubclass((obj := getattr(module, attr)).__class__, oftype) + and (type_name := get_type_name(obj)).startswith(module_name) } ) From e0e951f851caad90b03d3ed8171dfd54af7dc323 Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Sun, 14 May 2023 15:30:03 +0200 Subject: [PATCH 8/9] #224: cleaning up tests with explicit numpy.dtypes --- skops/io/tests/test_external.py | 4 ---- skops/io/tests/test_visualize.py | 5 +---- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index b41253c0..2c90c1cd 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -81,8 +81,6 @@ def trusted(self): "lightgbm.sklearn.LGBMClassifier", "lightgbm.sklearn.LGBMRegressor", "lightgbm.sklearn.LGBMRanker", - "numpy.int32", - "numpy.int64", "sklearn.preprocessing._label.LabelEncoder", ] @@ -329,8 +327,6 @@ def trusted(self): # TODO: adjust once more types are trusted by default return [ "builtins.bytes", - "numpy.float32", - "numpy.float64", "catboost.core.CatBoostClassifier", "catboost.core.CatBoostRegressor", "catboost.core.CatBoostRanker", diff --git a/skops/io/tests/test_visualize.py b/skops/io/tests/test_visualize.py index 4905cea2..65625903 100644 --- a/skops/io/tests/test_visualize.py +++ b/skops/io/tests/test_visualize.py @@ -110,13 +110,10 @@ def sink(nodes_iter, *args, **kwargs): assert len(nodes_unsafe) > 2 assert any("FunctionTransformer" in node.val for node in nodes_unsafe) - @pytest.mark.parametrize( - "trusted", [True, ["numpy.int64", "test_visualize.unsafe_function"]] - ) + @pytest.mark.parametrize("trusted", [True, ["test_visualize.unsafe_function"]]) def test_all_nodes_trusted(self, pipeline, trusted, capsys): # The pipeline contains untrusted type(s), but if we pass trusted=True, # it is not considered untrusted anymore - # TODO: remove numpy.int64 from trusted once it's trusted by default file = sio.dumps(pipeline) sio.visualize(file, show="untrusted", trusted=trusted) expected = "root: sklearn.pipeline.Pipeline" From fb69a4520a0c48f3c9a4751c4189a347959546ed Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Mon, 15 May 2023 12:17:47 +0200 Subject: [PATCH 9/9] Update skops/io/_utils.py Co-authored-by: Benjamin Bossan --- skops/io/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/io/_utils.py b/skops/io/_utils.py index c0b80770..d929380a 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -206,7 +206,7 @@ def get_type_paths(types: Any) -> list[str]: def get_public_type_names(module: ModuleType, oftype: Type) -> list[str]: """ Helper function that gets the type names of all - public objects of the given ``_type`` from the given ``module``, + public objects of the given ``oftype`` from the given ``module``, which start with the root module name. Public objects are those that can be read via ``dir(...)``.