skops-dev · adrinjalali · Jul 3, 2024 · Jun 6, 2024 · Jun 21, 2024 · Jun 23, 2024
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -21,12 +21,16 @@ jobs:
         include:
           - python: "3.9"
             sklearn_version: "1.1"
+            numpy_version: "numpy<2"
           - python: "3.10"
             sklearn_version: "1.2"
+            numpy_version: "numpy"
           - python: "3.11"
             sklearn_version: "1.4"
+            numpy_version: "numpy"
           - python: "3.12"
             sklearn_version: "nightly"
+            numpy_version: "numpy"
 
     # Timeout: https://stackoverflow.com/a/59076067/4521646
     timeout-minutes: 15
@@ -52,14 +56,15 @@ jobs:
 
       - name: Install dependencies
         run: |
+          python -m pip install -U pip
           pip install "pytest<8"
-          pip install .[docs,tests]
-          pip install black=="23.9.1" ruff=="0.0.292" mypy=="1.6.0"
-          pip uninstall --yes scikit-learn
+          pip install "${{ matrix.numpy_version }}"
           if [ ${{ matrix.sklearn_version }} == "nightly" ];
             then pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scikit-learn;
             else pip install "scikit-learn~=${{ matrix.sklearn_version }}";
           fi
+          pip install .[docs,tests]
+          pip install black=="23.9.1" ruff=="0.0.292" mypy=="1.6.0"
           if [ ${{ matrix.os }} == "ubuntu-latest" ];
             then sudo apt install pandoc && pandoc --version;
           fi

diff --git a/.github/workflows/deploy-model-card-creator.yml b/.github/workflows/deploy-model-card-creator.yml
@@ -2,6 +2,7 @@ name: Deploy-Space-Creator
 
 on:
   - push
+  - pull_request
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py
@@ -29,11 +29,6 @@
 else:
     from typing_extensions import Self
 
-# Repr attributes can be used to control the behavior of repr
-aRepr = Repr()
-aRepr.maxother = 79
-aRepr.maxstring = 79
-
 VALID_TEMPLATES = {item.value for item in Templates}
 NEED_SECTION_ERR_MSG = (
     "You are trying to {action} but you're using a custom template, please pass the "
@@ -1316,6 +1311,11 @@ def _add_metrics(
 
     def _generate_metadata(self, metadata: ModelCardData) -> Iterator[str]:
         """Yield metadata in yaml format"""
+        # Repr attributes can be used to control the behavior of repr
+        aRepr = Repr()
+        aRepr.maxother = 79
+        aRepr.maxstring = 79
+
         for key, val in metadata.to_dict().items() if metadata else {}:
             yield aRepr.repr(f"metadata.{key}={val},").strip('"').strip("'")
 
@@ -1367,11 +1367,18 @@ def _iterate_content(
                 yield from self._iterate_content(val.subsections, parent_section=title)
 
     @staticmethod
-    def _format_repr(text: str) -> str:
+    def _format_repr(title: str, content: str) -> str:
         # Remove new lines, multiple spaces, quotation marks, and cap line length
-        text = text.replace("\n", " ")
-        text = re.sub(r"\s+", r" ", text)
-        return aRepr.repr(text).strip('"').strip("'")
+        content = content.replace("\n", " ")
+        content = re.sub(r"\s+", r" ", content)
+
+        # Repr attributes can be used to control the behavior of repr
+        aRepr = Repr()
+        aRepr.maxother = max(3, 79 - len(title))
+        aRepr.maxstring = max(3, 79 - len(title))
+
+        content = aRepr.repr(content).strip('"').strip("'")
+        return f"{title}={content},"
 
     def __str__(self) -> str:
         return self.__repr__()
@@ -1380,7 +1387,7 @@ def __repr__(self) -> str:
         # repr for the model
         model = getattr(self, "model", None)
         if model:
-            model_repr = self._format_repr(f"model={repr(self.get_model())},")
+            model_repr = self._format_repr("model", repr(self.get_model()))
         else:
             model_repr = None
 
@@ -1391,7 +1398,7 @@ def __repr__(self) -> str:
                 metadata_reprs.append("metadata.widget=[{...}],")
                 continue
 
-            metadata_reprs.append(self._format_repr(f"metadata.{key}={val},"))
+            metadata_reprs.append(self._format_repr(f"metadata.{key}", repr(val)))
         metadata_repr = "\n".join(metadata_reprs)
 
         # repr for contents
@@ -1403,7 +1410,7 @@ def __repr__(self) -> str:
             if content.rstrip("`").rstrip().endswith(CONTENT_PLACEHOLDER):
                 # if content is just some default text, no need to show it
                 continue
-            content_reprs.append(self._format_repr(f"{title}={section},"))
+            content_reprs.append(self._format_repr(title, repr(section)))
         content_repr = "\n".join(content_reprs)
 
         # combine all parts

diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py
@@ -47,6 +47,14 @@ def save_model_to_file(model_instance, suffix):
     return save_file_handle, save_file
 
 
+def reprs_equal(repr1, repr2):
+    """Check that repr1 and repr2 are basically equal.
+
+    This ignores line order of what comes after the first and before the last line.
+    """
+    return sorted(repr1.split("\n")[1:-1]) == sorted(repr2.split("\n")[1:-1])
+
+
 @pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"])
 def test_load_model(suffix):
     model0 = LinearRegression(n_jobs=123)
@@ -1294,14 +1302,14 @@ def expected_lines(self):
         Card(
           model=LinearRegression(fit_intercept=False),
           Model description/Training Procedure/Hyperparameters=TableSection(4x2),
-          Model description/Training Procedure/...</div>,
+          Model description/Training Procedure/Model Plot=<style>#sk-co...v></div></div>,
           Model Card Authors=Jane Doe,
           Figures/ROC=PlotSection(ROC.png),
           Figures/Confusion matrix=PlotSection(confusion_matrix.jpg),
           Model Description=A description,
           Search Results=TableSection(3x2),
         )
-        """
+        """  # noqa: E501
         expected = textwrap.dedent(card_repr).strip()
         lines = expected.split("\n")
         return lines
@@ -1310,9 +1318,7 @@ def expected_lines(self):
     def test_card_repr(self, card: Card, meth, expected_lines):
         result = meth(card)
         expected = "\n".join(expected_lines)
-        expected = re.escape(expected)
-        expected = expected.replace(r"\.\.\.", ".*")
-        assert re.match(expected, result)
+        assert reprs_equal(expected, result)
 
     @pytest.mark.parametrize("meth", [repr, str])
     def test_card_repr_empty_card(self, meth):
@@ -1333,16 +1339,14 @@ def test_very_long_lines_are_shortened(self, card: Card, meth, expected_lines):
 
         # expected results contain 1 line at the very end
         extra_line = (
-            "  my_section=very long line very long l... "
-            "line very long line very long line ,"
+            "  my_section=very long line very long line ve...e very long line "
+            "very long line ,"
         )
         expected_lines.insert(-1, extra_line)
         expected = "\n".join(expected_lines)
-        expected = re.escape(expected)
-        expected = expected.replace(r"\.\.\.", ".*")
 
         result = meth(card)
-        assert re.match(expected, result)
+        assert reprs_equal(expected, result)
 
     @pytest.mark.parametrize("meth", [repr, str])
     def test_without_model_attribute(self, card: Card, meth, expected_lines):
@@ -1351,11 +1355,9 @@ def test_without_model_attribute(self, card: Card, meth, expected_lines):
         # remove line 1 from expected results, which corresponds to the model
         del expected_lines[1]
         expected = "\n".join(expected_lines)
-        expected = re.escape(expected)
-        expected = expected.replace(r"\.\.\.", ".*")
 
         result = meth(card)
-        assert re.match(expected, result)
+        assert reprs_equal(expected, result)
 
     @pytest.mark.parametrize("meth", [repr, str])
     def test_with_metadata(self, card: Card, meth, expected_lines):
@@ -1379,11 +1381,9 @@ def test_with_metadata(self, card: Card, meth, expected_lines):
             "  metadata.widget=[{...}],",
         ]
         expected = "\n".join(expected_lines[:2] + extra_lines + expected_lines[2:])
-        expected = re.escape(expected)
-        expected = expected.replace(r"\.\.\.", ".*")
         result = meth(card)
 
-        assert re.match(expected, result)
+        assert reprs_equal(expected, result)
 
 
 class TestCardModelAttributeIsPath:

diff --git a/skops/io/_numpy.py b/skops/io/_numpy.py
@@ -176,11 +176,12 @@ def _construct(self):
 
 def random_generator_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
     bit_generator_state = get_state(obj.bit_generator.state, save_context)
+    seed_seq_state = get_state(obj.bit_generator.seed_seq.state, save_context)
     res = {
         "__class__": obj.__class__.__name__,
         "__module__": get_module(type(obj)),
         "__loader__": "RandomGeneratorNode",
-        "content": {"bit_generator": bit_generator_state},
+        "content": {"bit_generator": bit_generator_state, "seed_seq": seed_seq_state},
     }
     return res
 
@@ -196,17 +197,27 @@ def __init__(
         self.children = {
             "bit_generator_state": get_tree(
                 state["content"]["bit_generator"], load_context, trusted=trusted
-            )
+            ),
+            "seed_seq_state": get_tree(
+                state["content"]["seed_seq"], load_context, trusted=trusted
+            ),
         }
         self.trusted = self._get_trusted(trusted, [np.random.Generator])
 
     def _construct(self):
         # first restore the state of the bit generator
+        seed_seq_cls = gettype(
+            "numpy.random.bit_generator",
+            "SeedSequence",
+        )
+        seed_seq_state = self.children["seed_seq_state"].construct()
+        seed_seq = seed_seq_cls(**seed_seq_state)
+
         bit_generator_state = self.children["bit_generator_state"].construct()
         bit_generator_cls = gettype(
             "numpy.random", bit_generator_state["bit_generator"]
         )
-        bit_generator = bit_generator_cls()
+        bit_generator = bit_generator_cls(seed_seq)
         bit_generator.state = bit_generator_state
 
         # next create the generator instance
@@ -260,7 +271,15 @@ def _construct(self):
 try:
     # From numpy=1.25.0 dispatching for `__array_function__` is done via
     # a C wrapper: https://github.com/numpy/numpy/pull/23020
-    from numpy.core._multiarray_umath import _ArrayFunctionDispatcher
+    try:
+        # numpy>=2
+        from numpy._core._multiarray_umath import (  # type: ignore
+            _ArrayFunctionDispatcher,
+        )
+    except ImportError:
+        from numpy.core._multiarray_umath import (  # type: ignore
+            _ArrayFunctionDispatcher,
+        )
 
     GET_STATE_DISPATCH_FUNCTIONS.append((_ArrayFunctionDispatcher, function_get_state))
 except ImportError:

diff --git a/skops/io/_persist.py b/skops/io/_persist.py
@@ -16,7 +16,7 @@
 # them. Old protocols are found in the 'old/' directory, with the protocol
 # version appended to the corresponding module name.
 modules = ["._general", "._numpy", "._scipy", "._sklearn", "._quantile_forest"]
-modules.extend([".old._general_v0", ".old._numpy_v0"])
+modules.extend([".old._general_v0", ".old._numpy_v0", ".old._numpy_v1"])
 for module_name in modules:
     # register exposed functions for get_state and get_tree
     module = importlib.import_module(module_name, package="skops.io")

diff --git a/skops/io/_protocol.py b/skops/io/_protocol.py
@@ -23,4 +23,4 @@
 version Y instead.
 
 """
-PROTOCOL = 1
+PROTOCOL = 2
diff --git a/skops/io/_quantile_forest.py b/skops/io/_quantile_forest.py
@@ -8,7 +8,10 @@
 
 try:
     from quantile_forest._quantile_forest_fast import QuantileForest
-except ImportError:
+except Exception:
+    # Mostly ImportError, but in case of older QuantileForest and numpy>=2 it
+    # could also be ValueError.
+    # In general, this warrants no errors on our side if the import fails.
     QuantileForest = None
 
 

diff --git a/skops/io/_trusted_types.py b/skops/io/_trusted_types.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 import scipy
 from sklearn.utils import all_estimators
@@ -14,15 +16,20 @@
     if get_type_name(estimator_class).startswith("sklearn.")
 ]
 
-SCIPY_UFUNC_TYPE_NAMES = get_public_type_names(module=scipy.special, oftype=np.ufunc)
+with warnings.catch_warnings():
+    # This is to suppress deprecation warning coming from the fact that scipy reports
+    # numpy.core for ufuncs, and numpy.core is deprecated and renamed to numpy._core
+    warnings.simplefilter("ignore", category=DeprecationWarning)
+    SCIPY_UFUNC_TYPE_NAMES = get_public_type_names(
+        module=scipy.special, oftype=np.ufunc
+    )
 
 NUMPY_UFUNC_TYPE_NAMES = get_public_type_names(module=np, oftype=np.ufunc)
 
 NUMPY_DTYPE_TYPE_NAMES = sorted(
     {
         type_name
-        for dtypes in np.sctypes.values()
-        for dtype in dtypes  # type: ignore
+        for dtype in np.sctypeDict.values()
         if (type_name := get_type_name(dtype)).startswith("numpy")
     }
 )
diff --git a/skops/io/_utils.py b/skops/io/_utils.py
@@ -2,6 +2,7 @@
 
 import importlib
 import sys
+import warnings
 from dataclasses import dataclass, field
 from functools import singledispatch
 from types import ModuleType
@@ -46,8 +47,11 @@ def whichmodule(obj: Any, name: str) -> str:
         ):
             continue
         try:
-            if _getattribute(module, name)[0] is obj:
-                return module_name
+            with warnings.catch_warnings():
+                # this is to silence numpy.core import warnings
+                warnings.simplefilter("ignore", DeprecationWarning)
+                if _getattribute(module, name)[0] is obj:
+                    return module_name
         except AttributeError:
             pass
     return "__main__"

diff --git a/skops/io/old/_numpy_v1.py b/skops/io/old/_numpy_v1.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from typing import Any, Optional, Sequence
+
+import numpy as np
+
+from skops.io._audit import Node, get_tree
+from skops.io._utils import LoadContext, gettype
+
+PROTOCOL = 1
+
+
+class RandomGeneratorNode(Node):
+    def __init__(
+        self,
+        state: dict[str, Any],
+        load_context: LoadContext,
+        trusted: Optional[Sequence[str]] = None,
+    ) -> None:
+        super().__init__(state, load_context, trusted)
+        self.children = {
+            "bit_generator_state": get_tree(
+                state["content"]["bit_generator"], load_context, trusted=trusted
+            )
+        }
+        self.trusted = self._get_trusted(trusted, [np.random.Generator])
+
+    def _construct(self):
+        # first restore the state of the bit generator
+        bit_generator_state = self.children["bit_generator_state"].construct()
+        bit_generator_cls = gettype(
+            "numpy.random", bit_generator_state["bit_generator"]
+        )
+        bit_generator = bit_generator_cls()
+        bit_generator.state = bit_generator_state
+
+        # next create the generator instance
+        return gettype(self.module_name, self.class_name)(bit_generator=bit_generator)
+
+
+# tuples of type and function that creates the instance of that type
+NODE_TYPE_MAPPING = {
+    ("RandomGeneratorNode", PROTOCOL): RandomGeneratorNode,
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ name: Deploy-Space-Creator @@
     on:
       - push
+      - pull_request
     concurrency:
       group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ Expand Down @@