From 895f84a217038595b91de82ff18e2748ed5a2a7a Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Mon, 6 Feb 2023 12:18:48 +0100 Subject: [PATCH 1/5] #224: getting {numpy,scipy}.ufuncs with different versions --- .pre-commit-config.yaml | 2 +- skops/io/_trusted_types.py | 30 ++++++++++++++++- skops/io/_utils.py | 69 +++++++++++++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e72ddf0..c218d7e4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - id: flake8 types: [file, python] - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.12.0 hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py index 1ef1b826..42fab392 100644 --- a/skops/io/_trusted_types.py +++ b/skops/io/_trusted_types.py @@ -1,6 +1,8 @@ +import numpy as np +import scipy from sklearn.utils import all_estimators -from ._utils import get_type_name +from ._utils import all_typed_instances, get_type_name PRIMITIVES_TYPES = [int, float, str, bool] @@ -11,3 +13,29 @@ for _, estimator_class in all_estimators() if get_type_name(estimator_class).startswith("sklearn.") ] + +NUMPY_UFUNC_TYPE_NAMES_V1 = sorted( + set( + [ + get_type_name(getattr(np, attr)) + for attr in dir(np) + if isinstance(getattr(np, attr), np.ufunc) + and get_type_name(getattr(np, attr)).startswith("numpy") + ] + ) +) + +NUMPY_UFUNC_TYPE_NAMES_V2 = all_typed_instances(np, np.ufunc) + +SCIPY_UFUNC_TYPE_NAMES_V1 = sorted( + set( + [ + get_type_name(getattr(scipy.special, attr)) + for attr in dir(scipy.special) + if isinstance(getattr(scipy.special, attr), np.ufunc) + and get_type_name(getattr(scipy.special, attr)).startswith("scipy") + ] + ) +) + +SCIPY_UFUNC_TYPE_NAMES_V2 = all_typed_instances(scipy, np.ufunc) diff --git a/skops/io/_utils.py b/skops/io/_utils.py index 17eaf8c1..a9992174 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -1,10 +1,13 @@ from __future__ import annotations import importlib +import inspect +import pkgutil import sys from dataclasses import dataclass, field from functools import singledispatch -from typing import Any, Type +from types import ModuleType +from typing import Any, Optional, Type from zipfile import ZipFile @@ -201,3 +204,67 @@ def get_type_paths(types: Any) -> list[str]: types = [types] return [get_type_name(t) if not isinstance(t, str) else t for t in types] + + +def all_typed_instances( + root_module: ModuleType, + base_type: Type, + modules_to_ignore: Optional[list[str]] = None, +) -> list[str]: + """Helper function that takes in root_module, + and deeply looks for all object that is instance of _type + and returns a list of them. + + Parameters + ---------- + root_module: ModuleType + Root modules to start from. + + base_type: Type + Base type who is being looking for. + + modules_to_ignore: list[str] + Modules to ignore during the walk. + + Returns + ---------- + types_list: list of str + The list of types, all as strings, e.g. ``["numpy.core._multiarray_umath.multiply"]``. + """ + if not root_module or not base_type: + return [] + + _MODULES_TO_IGNORE = ["tests", "setup", "conftest"] + + _SUBSTRINGS_TO_IGNORE = [".__", "test"] + + if modules_to_ignore is None: + modules_to_ignore = _MODULES_TO_IGNORE + else: + modules_to_ignore += _MODULES_TO_IGNORE + + typed_instances = [] + for _, module_name, _ in pkgutil.walk_packages( + root_module.__path__, prefix=f"{root_module.__name__}." + ): + module_parts = module_name.split(".") + if any(part in modules_to_ignore for part in module_parts) or any( + substring in module_name for substring in _SUBSTRINGS_TO_IGNORE + ): + continue + + try: + module = importlib.import_module(module_name) + except Exception: + continue + + typed_members = [ + get_type_name(member[1]) + for member in inspect.getmembers(module) + if isinstance(member[1], base_type) + and get_type_name(member[1]).startswith(root_module.__name__) + ] + + typed_instances.extend(typed_members) + + return sorted(set(typed_instances)) From 3abf3002dd333cdfab9f4e6d2f9cfe4ad5e21ebf Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Fri, 10 Feb 2023 18:54:57 +0100 Subject: [PATCH 2/5] #224: trusting scipy.special ufuncs; updating FunctionNode and embedding the test within estimators_fitted. --- skops/io/_general.py | 10 +++-- skops/io/_trusted_types.py | 19 +--------- skops/io/_utils.py | 69 +--------------------------------- skops/io/tests/test_persist.py | 4 +- 4 files changed, 13 insertions(+), 89 deletions(-) diff --git a/skops/io/_general.py b/skops/io/_general.py index 9bc2254a..cbe2f0c8 100644 --- a/skops/io/_general.py +++ b/skops/io/_general.py @@ -10,7 +10,11 @@ import numpy as np from ._audit import Node, get_tree -from ._trusted_types import PRIMITIVE_TYPE_NAMES, SKLEARN_ESTIMATOR_TYPE_NAMES +from ._trusted_types import ( + PRIMITIVE_TYPE_NAMES, + SCIPY_UFUNC_TYPE_NAMES, + SKLEARN_ESTIMATOR_TYPE_NAMES, +) from ._utils import ( LoadContext, SaveContext, @@ -195,7 +199,7 @@ def __init__( ) -> None: super().__init__(state, load_context, trusted) # TODO: what do we trust? - self.trusted = self._get_trusted(trusted, []) + self.trusted = self._get_trusted(trusted, default=SCIPY_UFUNC_TYPE_NAMES) self.children = {"content": state["content"]} def _construct(self): @@ -212,7 +216,7 @@ def _get_function_name(self) -> str: ) def get_unsafe_set(self) -> set[str]: - if self.trusted is True: + if self.trusted is True or self._get_function_name() in self.trusted: return set() return {self._get_function_name()} diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py index 42fab392..39e55573 100644 --- a/skops/io/_trusted_types.py +++ b/skops/io/_trusted_types.py @@ -2,7 +2,7 @@ import scipy from sklearn.utils import all_estimators -from ._utils import all_typed_instances, get_type_name +from ._utils import get_type_name PRIMITIVES_TYPES = [int, float, str, bool] @@ -14,20 +14,7 @@ if get_type_name(estimator_class).startswith("sklearn.") ] -NUMPY_UFUNC_TYPE_NAMES_V1 = sorted( - set( - [ - get_type_name(getattr(np, attr)) - for attr in dir(np) - if isinstance(getattr(np, attr), np.ufunc) - and get_type_name(getattr(np, attr)).startswith("numpy") - ] - ) -) - -NUMPY_UFUNC_TYPE_NAMES_V2 = all_typed_instances(np, np.ufunc) - -SCIPY_UFUNC_TYPE_NAMES_V1 = sorted( +SCIPY_UFUNC_TYPE_NAMES = sorted( set( [ get_type_name(getattr(scipy.special, attr)) @@ -37,5 +24,3 @@ ] ) ) - -SCIPY_UFUNC_TYPE_NAMES_V2 = all_typed_instances(scipy, np.ufunc) diff --git a/skops/io/_utils.py b/skops/io/_utils.py index a9992174..17eaf8c1 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -1,13 +1,10 @@ from __future__ import annotations import importlib -import inspect -import pkgutil import sys from dataclasses import dataclass, field from functools import singledispatch -from types import ModuleType -from typing import Any, Optional, Type +from typing import Any, Type from zipfile import ZipFile @@ -204,67 +201,3 @@ def get_type_paths(types: Any) -> list[str]: types = [types] return [get_type_name(t) if not isinstance(t, str) else t for t in types] - - -def all_typed_instances( - root_module: ModuleType, - base_type: Type, - modules_to_ignore: Optional[list[str]] = None, -) -> list[str]: - """Helper function that takes in root_module, - and deeply looks for all object that is instance of _type - and returns a list of them. - - Parameters - ---------- - root_module: ModuleType - Root modules to start from. - - base_type: Type - Base type who is being looking for. - - modules_to_ignore: list[str] - Modules to ignore during the walk. - - Returns - ---------- - types_list: list of str - The list of types, all as strings, e.g. ``["numpy.core._multiarray_umath.multiply"]``. - """ - if not root_module or not base_type: - return [] - - _MODULES_TO_IGNORE = ["tests", "setup", "conftest"] - - _SUBSTRINGS_TO_IGNORE = [".__", "test"] - - if modules_to_ignore is None: - modules_to_ignore = _MODULES_TO_IGNORE - else: - modules_to_ignore += _MODULES_TO_IGNORE - - typed_instances = [] - for _, module_name, _ in pkgutil.walk_packages( - root_module.__path__, prefix=f"{root_module.__name__}." - ): - module_parts = module_name.split(".") - if any(part in modules_to_ignore for part in module_parts) or any( - substring in module_name for substring in _SUBSTRINGS_TO_IGNORE - ): - continue - - try: - module = importlib.import_module(module_name) - except Exception: - continue - - typed_members = [ - get_type_name(member[1]) - for member in inspect.getmembers(module) - if isinstance(member[1], base_type) - and get_type_name(member[1]).startswith(root_module.__name__) - ] - - typed_instances.extend(typed_members) - - return sorted(set(typed_instances)) diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index bd9c1e20..b1ad3945 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -51,7 +51,7 @@ from skops.io import dump, dumps, get_untrusted_types, load, loads from skops.io._audit import NODE_TYPE_MAPPING, get_tree from skops.io._sklearn import UNSUPPORTED_TYPES -from skops.io._trusted_types import SKLEARN_ESTIMATOR_TYPE_NAMES +from skops.io._trusted_types import SCIPY_UFUNC_TYPE_NAMES, SKLEARN_ESTIMATOR_TYPE_NAMES from skops.io._utils import LoadContext, SaveContext, _get_state, get_state from skops.io.exceptions import UnsupportedTypeException from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal @@ -345,6 +345,8 @@ def test_can_persist_fitted(estimator): assert_params_equal(estimator.__dict__, loaded.__dict__) assert not any(type_ in SKLEARN_ESTIMATOR_TYPE_NAMES for type_ in untrusted_types) + # TODO: implement function testing all ufuncs. + assert not any(type_ in SCIPY_UFUNC_TYPE_NAMES for type_ in untrusted_types) assert_method_outputs_equal(estimator, loaded, X) From 2423b8fdb52dbb5679287337d58d6081e5aaf803 Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Sat, 11 Feb 2023 16:46:01 +0100 Subject: [PATCH 3/5] #224: testing persisting all scipy_ufuncs; changelog updated --- docs/changes.rst | 5 +++-- skops/io/tests/test_persist.py | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index b2bdfe68..38803e28 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,4 +1,4 @@ -.. include:: _authors.rst +ds.. include:: _authors.rst .. _changelog: @@ -12,7 +12,8 @@ skops Changelog v0.6 ---- - Added tabular regression example. :pr: `254` by `Thomas Lazarus` - +- All public ``scipy.special`` ufuncs (Universal Functions) are trusted by default. + :pr:`295` by :user:`Omar Arab Oghli `. v0.5 ---- - Added CLI entrypoint support (:func:`.cli.entrypoint.main_cli`) diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index b1ad3945..e7c5a1e0 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -52,7 +52,7 @@ from skops.io._audit import NODE_TYPE_MAPPING, get_tree from skops.io._sklearn import UNSUPPORTED_TYPES from skops.io._trusted_types import SCIPY_UFUNC_TYPE_NAMES, SKLEARN_ESTIMATOR_TYPE_NAMES -from skops.io._utils import LoadContext, SaveContext, _get_state, get_state +from skops.io._utils import LoadContext, SaveContext, _get_state, get_state, gettype from skops.io.exceptions import UnsupportedTypeException from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal @@ -221,6 +221,15 @@ def _tested_estimators(type_filter=None): ) +def _tested_ufuncs(): + for ufunc_name in SCIPY_UFUNC_TYPE_NAMES: + parts = ufunc_name.split(".") + module_name = ".".join(parts[:-1]) + ufunc_name = parts[-1] + + yield gettype(module_name=module_name, cls_or_func=ufunc_name) + + def _unsupported_estimators(type_filter=None): for name, Estimator in all_estimators(type_filter=type_filter): if Estimator not in UNSUPPORTED_TYPES: @@ -345,11 +354,18 @@ def test_can_persist_fitted(estimator): assert_params_equal(estimator.__dict__, loaded.__dict__) assert not any(type_ in SKLEARN_ESTIMATOR_TYPE_NAMES for type_ in untrusted_types) - # TODO: implement function testing all ufuncs. assert not any(type_ in SCIPY_UFUNC_TYPE_NAMES for type_ in untrusted_types) assert_method_outputs_equal(estimator, loaded, X) +@pytest.mark.parametrize("ufunc", _tested_ufuncs(), ids=SCIPY_UFUNC_TYPE_NAMES) +def test_can_trust_ufuncs(ufunc): + dumped = dumps(ufunc) + untrusted_types = get_untrusted_types(data=dumped) + assert not any(type_ in SCIPY_UFUNC_TYPE_NAMES for type_ in untrusted_types) + # TODO: extend with numpy ufuncs + + @pytest.mark.parametrize( "estimator", _unsupported_estimators(), ids=_get_check_estimator_ids ) From 853d67d8381d33f1e2d8d09a9964cde8bf7a4fb3 Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Mon, 13 Feb 2023 17:50:33 +0100 Subject: [PATCH 4/5] #224: applying code-review remarks. --- docs/changes.rst | 2 +- skops/io/_general.py | 2 +- skops/io/tests/test_persist.py | 9 +++------ 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 38803e28..bf8211f0 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,4 +1,4 @@ -ds.. include:: _authors.rst +.. include:: _authors.rst .. _changelog: diff --git a/skops/io/_general.py b/skops/io/_general.py index cbe2f0c8..51a184f8 100644 --- a/skops/io/_general.py +++ b/skops/io/_general.py @@ -216,7 +216,7 @@ def _get_function_name(self) -> str: ) def get_unsafe_set(self) -> set[str]: - if self.trusted is True or self._get_function_name() in self.trusted: + if (self.trusted is True) or (self._get_function_name() in self.trusted): return set() return {self._get_function_name()} diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index e7c5a1e0..c12aa8d7 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -222,11 +222,8 @@ def _tested_estimators(type_filter=None): def _tested_ufuncs(): - for ufunc_name in SCIPY_UFUNC_TYPE_NAMES: - parts = ufunc_name.split(".") - module_name = ".".join(parts[:-1]) - ufunc_name = parts[-1] - + for full_name in SCIPY_UFUNC_TYPE_NAMES: + module_name, _, ufunc_name = full_name.rpartition(".") yield gettype(module_name=module_name, cls_or_func=ufunc_name) @@ -362,7 +359,7 @@ def test_can_persist_fitted(estimator): def test_can_trust_ufuncs(ufunc): dumped = dumps(ufunc) untrusted_types = get_untrusted_types(data=dumped) - assert not any(type_ in SCIPY_UFUNC_TYPE_NAMES for type_ in untrusted_types) + assert len(untrusted_types) == 0 # TODO: extend with numpy ufuncs From ad89ae3d9ece8d2d56bb118062870e9727cb603d Mon Sep 17 00:00:00 2001 From: Omar Arab Oghli Date: Tue, 14 Feb 2023 12:05:35 +0100 Subject: [PATCH 5/5] Update docs/changes.rst Co-authored-by: Benjamin Bossan --- docs/changes.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index bf8211f0..28fa9975 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -12,8 +12,9 @@ skops Changelog v0.6 ---- - Added tabular regression example. :pr: `254` by `Thomas Lazarus` -- All public ``scipy.special`` ufuncs (Universal Functions) are trusted by default. - :pr:`295` by :user:`Omar Arab Oghli `. +- All public ``scipy.special`` ufuncs (Universal Functions) are trusted by default + by :func:`.io.load`. :pr:`295` by :user:`Omar Arab Oghli `. + v0.5 ---- - Added CLI entrypoint support (:func:`.cli.entrypoint.main_cli`)