From aef013114e5d9106a82e243e384301cbed451ece Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 9 Dec 2022 13:35:16 +0100 Subject: [PATCH 01/10] MNT drop python=3.7 support --- .github/workflows/build-test.yml | 11 ++++---- pyproject.toml | 2 +- setup.py | 6 ++--- skops/hub_utils/tests/test_hf_hub.py | 12 ++++----- skops/utils/fixes.py | 37 ------------------------- skops/utils/tests/test_fixes.py | 40 ---------------------------- 6 files changed, 16 insertions(+), 92 deletions(-) delete mode 100644 skops/utils/tests/test_fixes.py diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 25066be1..5fe8c6d1 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -17,18 +17,19 @@ jobs: fail-fast: false # need to see which ones fail matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python: ["3.7", "3.8", "3.9", "3.10"] + python: ["3.8", "3.9", "3.10", "3.11"] # this is to make the CI run on different sklearn versions include: - - python: "3.7" - sklearn_version: "0.24.0" - python: "3.8" - sklearn_version: "1.0.0" + sklearn_version: "1.0" - python: "3.9" - sklearn_version: "1.1.0" + sklearn_version: "1.1" - python: "3.10" + sklearn_version: "1.2" + - python: "3.11" sklearn_version: "nightly" + # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 15 diff --git a/pyproject.toml b/pyproject.toml index b19d1aa4..2ee0d477 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.black] line-length = 88 -target_version = ['py37', 'py38', 'py39', 'py310'] +target_version = ['py38', 'py39', 'py310', 'py311'] preview = true [tool.isort] diff --git a/setup.py b/setup.py index df25333b..69d800a5 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ # This is a bit (!) hackish: we are setting a global variable so that the # main modelcard __init__ can detect if it is being loaded by the setup # routine, to avoid attempting to load components. -builtins.__SKOPS_SETUP__ = True +builtins.__SKOPS_SETUP__ = True # type: ignore import skops # noqa @@ -58,13 +58,13 @@ def setup_package(): "Operating System :: Unix", "Operating System :: MacOS", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", ], - python_requires=">=3.7", + python_requires=">=3.8", install_requires=min_deps.tag_to_packages["install"], extras_require={ "docs": min_deps.tag_to_packages["docs"], diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 5aa229d1..7e3033f3 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -36,7 +36,7 @@ _validate_folder, ) from skops.hub_utils.tests.common import HF_HUB_TOKEN -from skops.utils.fixes import metadata, path_unlink +from skops.utils.fixes import metadata iris = load_iris(as_frame=True, return_X_y=False) diabetes = load_diabetes(as_frame=True, return_X_y=False) @@ -85,7 +85,7 @@ def classifier_pickle(repo_path): pickle.dump(clf, f) yield path finally: - path_unlink(path, missing_ok=True) + path.unlink(missing_ok=True) CONFIG = { @@ -104,7 +104,7 @@ def config_json(repo_path): json.dump(CONFIG, f) yield path finally: - path_unlink(path, missing_ok=True) + path.unlink(missing_ok=True) def test_validate_folder(config_json): @@ -304,7 +304,7 @@ def test_model_file_does_not_exist_raises(repo_path, config_json): task="tabular-classification", data=iris.data, ) - path_unlink(model_path, missing_ok=True) + model_path.unlink(missing_ok=True) def test_init_empty_model_file_errors(repo_path, config_json): @@ -326,7 +326,7 @@ def test_init_empty_model_file_errors(repo_path, config_json): task="tabular-classification", data=iris.data, ) - path_unlink(model_path, missing_ok=True) + model_path.unlink(missing_ok=True) @pytest.mark.network @@ -459,7 +459,7 @@ def test_inference( # cleanup client.delete_repo(repo_id=repo_id, token=HF_HUB_TOKEN) - path_unlink(model_path, missing_ok=True) + model_path.unlink(missing_ok=True) assert np.allclose(output, y_pred) diff --git a/skops/utils/fixes.py b/skops/utils/fixes.py index e9d83558..11fee9c7 100644 --- a/skops/utils/fixes.py +++ b/skops/utils/fixes.py @@ -2,8 +2,6 @@ # versions of a dependency. import sys -from contextlib import suppress -from pathlib import Path if sys.version_info >= (3, 8): # py>=3.8 @@ -20,38 +18,3 @@ # if you're removing this, you should also remove the dependency from # _min_dependencies.py from typing_extensions import Literal # noqa - - -def path_unlink(path: Path, missing_ok: bool = False) -> None: - """Remove this file or symbolic link - - Parameters - ---------- - path : pathlib.Path - Path to the file to be removed - - missing_ok : bool (default=False) - If False, ``FileNotFoundError`` is raised if the path does not exist. If - True, ``FileNotFoundError`` exceptions will be ignored (same behavior as - the POSIX ``rm -f`` command). - - Raises - ------ - FileNotFoundError - Is raised if ``missing_ok`` is False and the file is missing. - - """ - # Python 3.7 does not support the missing_ok argument. - # One we move to Python >= 3.8, this function can just call - # Path.unlink(missing_ok) - if not missing_ok: # default behavior - path.unlink() - return - - if sys.version_info >= (3, 8): - path.unlink(missing_ok=missing_ok) - return - - # for Python 3.7, just catch the error - with suppress(FileNotFoundError): - path.unlink() diff --git a/skops/utils/tests/test_fixes.py b/skops/utils/tests/test_fixes.py deleted file mode 100644 index 2a1d9991..00000000 --- a/skops/utils/tests/test_fixes.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Tests for skops.utils.fixes.py""" - -import tempfile -from pathlib import Path - -import pytest - - -class TestPathUnlink: - @pytest.fixture(scope="class") - def path_unlink(self): - from skops.utils.fixes import path_unlink - - return path_unlink - - @pytest.fixture(scope="class") - def tempdir(self): - with tempfile.TemporaryDirectory(prefix="skops-test") as directory: - yield Path(directory) - - def test_path_unlink_file_exists(self, path_unlink, tempdir): - path = tempdir / "some-file" - path.touch() - assert path.exists() - - path_unlink(path) - assert not path.exists() - - def test_path_unlink_file_does_not_exist_raises(self, path_unlink, tempdir): - path = tempdir / "some-file" - assert not path.exists() - - with pytest.raises(FileNotFoundError): - path_unlink(path) - - def test_path_unlink_file_does_not_missing_ok(self, path_unlink, tempdir): - path = tempdir / "some-file" - assert not path.exists() - # does not raise an error - path_unlink(path, missing_ok=True) From 8a64e422f8a6ebfd38b61da1e3998aa5e07e5bcb Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 12 Dec 2022 11:38:16 +0100 Subject: [PATCH 02/10] add futurewarning to the list of ignored ones --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2ee0d477..fdc272a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,8 @@ filterwarnings = [ "ignore:The \\'sym_pos\\' keyword is deprecated and should be replaced:DeprecationWarning", # https://github.com/scikit-learn/scikit-learn/pull/23633 "ignore:Unlike other reduction functions:FutureWarning", + # https://github.com/scikit-learn/scikit-learn/pull/25157 + "ignore:open_text is deprecated:FutureWarning", ] markers = [ "network: marks tests as requiring internet (deselect with '-m \"not network\"')", From 7fd557c30f2a30d9ffa0d71e3f22cc6c1529356e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 12 Dec 2022 15:43:30 +0100 Subject: [PATCH 03/10] it's a DeprecationWarning --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fdc272a7..be7c59cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ filterwarnings = [ # https://github.com/scikit-learn/scikit-learn/pull/23633 "ignore:Unlike other reduction functions:FutureWarning", # https://github.com/scikit-learn/scikit-learn/pull/25157 - "ignore:open_text is deprecated:FutureWarning", + "ignore:open_text is deprecated:DeprecationWarning", ] markers = [ "network: marks tests as requiring internet (deselect with '-m \"not network\"')", From 00a3917d995b258b08a2a147a374a61fb32247b7 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 13 Dec 2022 12:06:35 +0100 Subject: [PATCH 04/10] change message --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index be7c59cc..c2a37115 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ filterwarnings = [ # https://github.com/scikit-learn/scikit-learn/pull/23633 "ignore:Unlike other reduction functions:FutureWarning", # https://github.com/scikit-learn/scikit-learn/pull/25157 - "ignore:open_text is deprecated:DeprecationWarning", + "ignore:Use files\(\) instead:DeprecationWarning", ] markers = [ "network: marks tests as requiring internet (deselect with '-m \"not network\"')", From b371b560d2eadcec76b3de514acdba7604583341 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 13 Dec 2022 13:12:51 +0100 Subject: [PATCH 05/10] try again --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c2a37115..4420fe59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ filterwarnings = [ # https://github.com/scikit-learn/scikit-learn/pull/23633 "ignore:Unlike other reduction functions:FutureWarning", # https://github.com/scikit-learn/scikit-learn/pull/25157 - "ignore:Use files\(\) instead:DeprecationWarning", + "ignore:Use files() instead:DeprecationWarning", ] markers = [ "network: marks tests as requiring internet (deselect with '-m \"not network\"')", From 6d63c102b7c8cfed271579de76b0ce8ca030e045 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 13 Dec 2022 14:32:10 +0100 Subject: [PATCH 06/10] use Ben's suggestion --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4420fe59..c957920e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ filterwarnings = [ # https://github.com/scikit-learn/scikit-learn/pull/23633 "ignore:Unlike other reduction functions:FutureWarning", # https://github.com/scikit-learn/scikit-learn/pull/25157 - "ignore:Use files() instead:DeprecationWarning", + "ignore:\\w+ is deprecated. Use files\\(\\) instead:DeprecationWarning" ] markers = [ "network: marks tests as requiring internet (deselect with '-m \"not network\"')", From e0833e468d67d19d799331964457ae119332a4e2 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 13 Jan 2023 15:30:15 +0100 Subject: [PATCH 07/10] catboost doesn't support 311 --- skops/_min_dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index 35219a12..565263fc 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -29,7 +29,8 @@ # required for persistence tests of external libraries "lightgbm": ("3", "tests", None), "xgboost": ("1.6", "tests", None), - "catboost": ("1.0", "tests", None), + # TODO: remove condition when catboost supports python 3.11 + "catboost": ("1.0", "tests", "python_version < '3.11'"), } From d38e072635bb53dde84d61e15d3e1730b608889d Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 13 Jan 2023 15:35:41 +0100 Subject: [PATCH 08/10] Revert "FIX: Persistence tests failing when using sklearn nightly (#260)" This reverts commit 1b3c674cde51dfa853ebcf6844f10a2a9ee2cb97. --- skops/io/tests/_utils.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/skops/io/tests/_utils.py b/skops/io/tests/_utils.py index 6b02c3af..ead14b29 100644 --- a/skops/io/tests/_utils.py +++ b/skops/io/tests/_utils.py @@ -126,13 +126,6 @@ def _assert_vals_equal(val1, val2): def assert_params_equal(params1, params2): - # due to https://github.com/scikit-learn/scikit-learn/pull/22094, after - # loading an sklearn estimator, there might be an entry called - # "__sklearn_pickle_version__" in the __dict__ that wasn't there before. We - # just ignore it. - params1.pop("__sklearn_pickle_version__", None) - params2.pop("__sklearn_pickle_version__", None) - # helper function to compare estimator dictionaries of parameters assert len(params1) == len(params2) assert set(params1.keys()) == set(params2.keys()) From c44b33976308750a5c7ce7151f35346e0f09a1ce Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 13 Jan 2023 16:10:46 +0100 Subject: [PATCH 09/10] check for None --- skops/io/tests/_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/skops/io/tests/_utils.py b/skops/io/tests/_utils.py index ead14b29..9bb31a32 100644 --- a/skops/io/tests/_utils.py +++ b/skops/io/tests/_utils.py @@ -127,6 +127,8 @@ def _assert_vals_equal(val1, val2): def assert_params_equal(params1, params2): # helper function to compare estimator dictionaries of parameters + if params1 is None and params2 is None: + return assert len(params1) == len(params2) assert set(params1.keys()) == set(params2.keys()) for key in params1: From e9fe32f162662e334699a9295ab8375bfe15deff Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 19 Jan 2023 16:51:38 +0100 Subject: [PATCH 10/10] apply [almost] Benjamin's patch --- skops/io/_general.py | 9 +++++---- skops/io/_sklearn.py | 13 ++++++++++--- skops/io/tests/_utils.py | 7 +++++-- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/skops/io/_general.py b/skops/io/_general.py index 10ef9a0f..9bc2254a 100644 --- a/skops/io/_general.py +++ b/skops/io/_general.py @@ -401,10 +401,11 @@ def _construct(self): return instance attrs = self.children["attrs"].construct() - if hasattr(instance, "__setstate__"): - instance.__setstate__(attrs) - else: - instance.__dict__.update(attrs) + if attrs is not None: + if hasattr(instance, "__setstate__"): + instance.__setstate__(attrs) + else: + instance.__dict__.update(attrs) return instance diff --git a/skops/io/_sklearn.py b/skops/io/_sklearn.py index 9da5392a..14ba4a87 100644 --- a/skops/io/_sklearn.py +++ b/skops/io/_sklearn.py @@ -72,13 +72,15 @@ def reduce_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: # call __getstate__ directly. attrs = reduce[2] elif hasattr(obj, "__getstate__"): - attrs = obj.__getstate__() + # since python311 __getstate__ is defined for `object` and might return + # None + attrs = obj.__getstate__() or {} elif hasattr(obj, "__dict__"): attrs = obj.__dict__ else: attrs = {} - if not isinstance(attrs, dict): + if not isinstance(attrs, (dict, tuple)): raise UnsupportedTypeException( f"Objects of type {res['__class__']} not supported yet" ) @@ -119,8 +121,13 @@ def _construct(self): if hasattr(instance, "__setstate__"): instance.__setstate__(attrs) - else: + elif isinstance(attrs, dict): instance.__dict__.update(attrs) + else: + # we (probably) got tuple attrs but cannot setstate with them + raise UnsupportedTypeException( + f"Objects of type {constructor} are not supported yet" + ) return instance diff --git a/skops/io/tests/_utils.py b/skops/io/tests/_utils.py index 9bb31a32..9081a9fc 100644 --- a/skops/io/tests/_utils.py +++ b/skops/io/tests/_utils.py @@ -67,10 +67,13 @@ def _assert_tuples_equal(val1, val2): def _assert_vals_equal(val1, val2): - if hasattr(val1, "__getstate__"): + if type(val1) == type: # e.g. could be np.int64 + assert val1 is val2 + elif hasattr(val1, "__getstate__") and (val1.__getstate__() is not None): # This includes BaseEstimator since they implement __getstate__ and # that returns the parameters as well. - # + # Since Python 3.11, all objects have a __getstate__ but they return + # None by default, in which case this check is not performed. # Some objects return a tuple of parameters, others a dict. state1 = val1.__getstate__() state2 = val2.__getstate__()