From 2e98fafb3bc288589e5664d42f1d3be1c344325c Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 27 Jun 2023 14:54:49 +0200 Subject: [PATCH 1/2] MNT: Update repo URL of sklearn nightly build See this comment: https://github.com/scikit-learn/scikit-learn/issues/25273#issuecomment-1609430993 --- .github/workflows/build-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 1b3df64a..7051a361 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -63,7 +63,7 @@ jobs: pip install black=="22.6.0" isort=="5.10.1" mypy=="1.0.0" pip uninstall --yes scikit-learn if [ ${{ matrix.sklearn_version }} == "nightly" ]; - then pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn; + then pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scikit-learn; else pip install "scikit-learn~=${{ matrix.sklearn_version }}"; fi if [ ${{ matrix.os }} == "ubuntu-latest" ]; From cb7d10026f99104a9c1788ecfaee3a7cada6884e Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Wed, 5 Jul 2023 12:15:17 +0200 Subject: [PATCH 2/2] New solution: test with sparse matrix failing Instead of restoring the private attributes after loading a sparse matrix, just don't test for these attributes. They are not restored, but that shouldn't affect users. --- skops/io/tests/_utils.py | 15 +++++++++++++++ skops/io/tests/test_persist.py | 18 ++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/skops/io/tests/_utils.py b/skops/io/tests/_utils.py index d355f959..58857c3c 100644 --- a/skops/io/tests/_utils.py +++ b/skops/io/tests/_utils.py @@ -133,10 +133,25 @@ def _assert_vals_equal(val1, val2): _assert_generic_objects_equal(val1, val2) +def _clean_params(params): + # this function deals with cleaning special parameters that for one reason + # or another should be removed or modified. + params = params.copy() + + # see #375 + keys_to_remove = ["_has_canonical_format", "_has_sorted_indices"] + for key in keys_to_remove: + params.pop(key, None) + + return params + + def assert_params_equal(params1, params2): # helper function to compare estimator dictionaries of parameters if params1 is None and params2 is None: return + + params1, params2 = _clean_params(params1), _clean_params(params2) assert len(params1) == len(params2) assert set(params1.keys()) == set(params2.keys()) for key in params1: diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index eb69e70a..25ebf37d 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -1047,3 +1047,21 @@ def test_compression_level(): dumped_compressed = dumps(model, compression=ZIP_DEFLATED, compresslevel=9) # This reduces the size substantially assert len(dumped_raw) > 5 * len(dumped_compressed) + + +@pytest.mark.parametrize("call_has_canonical_format", [False, True]) +def test_sparse_matrix(call_has_canonical_format): + # see https://github.com/skops-dev/skops/pull/375 + + # note: this behavior is already implicitly tested by sklearn estimators + # that use sparse matrices under the hood (tfidf) but it is better to check + # the behavior explicitly + x = sparse.csr_matrix((3, 4)) + if call_has_canonical_format: + x.has_canonical_format + + dumped = dumps(x) + untrusted_types = get_untrusted_types(data=dumped) + y = loads(dumped, trusted=untrusted_types) + + assert_params_equal(x.__dict__, y.__dict__)