Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@ jobs:
include:
- python: "3.9"
sklearn_version: "1.1"
numpy_version: "numpy<2"
- python: "3.10"
sklearn_version: "1.2"
numpy_version: "numpy"
- python: "3.11"
sklearn_version: "1.4"
numpy_version: "numpy"
- python: "3.12"
sklearn_version: "nightly"
numpy_version: "numpy"

# Timeout: https://stackoverflow.com/a/59076067/4521646
timeout-minutes: 15
Expand All @@ -52,14 +56,15 @@ jobs:

- name: Install dependencies
run: |
python -m pip install -U pip
pip install "pytest<8"
pip install .[docs,tests]
pip install black=="23.9.1" ruff=="0.0.292" mypy=="1.6.0"
pip uninstall --yes scikit-learn
pip install "${{ matrix.numpy_version }}"
if [ ${{ matrix.sklearn_version }} == "nightly" ];
then pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scikit-learn;
else pip install "scikit-learn~=${{ matrix.sklearn_version }}";
fi
pip install .[docs,tests]
pip install black=="23.9.1" ruff=="0.0.292" mypy=="1.6.0"
if [ ${{ matrix.os }} == "ubuntu-latest" ];
then sudo apt install pandoc && pandoc --version;
fi
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/deploy-model-card-creator.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: Deploy-Space-Creator

on:
- push
- pull_request

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Expand Down
31 changes: 19 additions & 12 deletions skops/card/_model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@
else:
from typing_extensions import Self

# Repr attributes can be used to control the behavior of repr
aRepr = Repr()
aRepr.maxother = 79
aRepr.maxstring = 79

VALID_TEMPLATES = {item.value for item in Templates}
NEED_SECTION_ERR_MSG = (
"You are trying to {action} but you're using a custom template, please pass the "
Expand Down Expand Up @@ -1316,6 +1311,11 @@ def _add_metrics(

def _generate_metadata(self, metadata: ModelCardData) -> Iterator[str]:
"""Yield metadata in yaml format"""
# Repr attributes can be used to control the behavior of repr
aRepr = Repr()
aRepr.maxother = 79
aRepr.maxstring = 79

for key, val in metadata.to_dict().items() if metadata else {}:
yield aRepr.repr(f"metadata.{key}={val},").strip('"').strip("'")

Expand Down Expand Up @@ -1367,11 +1367,18 @@ def _iterate_content(
yield from self._iterate_content(val.subsections, parent_section=title)

@staticmethod
def _format_repr(text: str) -> str:
def _format_repr(title: str, content: str) -> str:
# Remove new lines, multiple spaces, quotation marks, and cap line length
text = text.replace("\n", " ")
text = re.sub(r"\s+", r" ", text)
return aRepr.repr(text).strip('"').strip("'")
content = content.replace("\n", " ")
content = re.sub(r"\s+", r" ", content)

# Repr attributes can be used to control the behavior of repr
aRepr = Repr()
aRepr.maxother = max(3, 79 - len(title))
aRepr.maxstring = max(3, 79 - len(title))

content = aRepr.repr(content).strip('"').strip("'")
return f"{title}={content},"

def __str__(self) -> str:
return self.__repr__()
Expand All @@ -1380,7 +1387,7 @@ def __repr__(self) -> str:
# repr for the model
model = getattr(self, "model", None)
if model:
model_repr = self._format_repr(f"model={repr(self.get_model())},")
model_repr = self._format_repr("model", repr(self.get_model()))
else:
model_repr = None

Expand All @@ -1391,7 +1398,7 @@ def __repr__(self) -> str:
metadata_reprs.append("metadata.widget=[{...}],")
continue

metadata_reprs.append(self._format_repr(f"metadata.{key}={val},"))
metadata_reprs.append(self._format_repr(f"metadata.{key}", repr(val)))
metadata_repr = "\n".join(metadata_reprs)

# repr for contents
Expand All @@ -1403,7 +1410,7 @@ def __repr__(self) -> str:
if content.rstrip("`").rstrip().endswith(CONTENT_PLACEHOLDER):
# if content is just some default text, no need to show it
continue
content_reprs.append(self._format_repr(f"{title}={section},"))
content_reprs.append(self._format_repr(title, repr(section)))
content_repr = "\n".join(content_reprs)

# combine all parts
Expand Down
32 changes: 16 additions & 16 deletions skops/card/tests/test_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ def save_model_to_file(model_instance, suffix):
return save_file_handle, save_file


def reprs_equal(repr1, repr2):
"""Check that repr1 and repr2 are basically equal.

This ignores line order of what comes after the first and before the last line.
"""
return sorted(repr1.split("\n")[1:-1]) == sorted(repr2.split("\n")[1:-1])


@pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"])
def test_load_model(suffix):
model0 = LinearRegression(n_jobs=123)
Expand Down Expand Up @@ -1294,14 +1302,14 @@ def expected_lines(self):
Card(
model=LinearRegression(fit_intercept=False),
Model description/Training Procedure/Hyperparameters=TableSection(4x2),
Model description/Training Procedure/...</div>,
Model description/Training Procedure/Model Plot=<style>#sk-co...v></div></div>,
Model Card Authors=Jane Doe,
Figures/ROC=PlotSection(ROC.png),
Figures/Confusion matrix=PlotSection(confusion_matrix.jpg),
Model Description=A description,
Search Results=TableSection(3x2),
)
"""
""" # noqa: E501
expected = textwrap.dedent(card_repr).strip()
lines = expected.split("\n")
return lines
Expand All @@ -1310,9 +1318,7 @@ def expected_lines(self):
def test_card_repr(self, card: Card, meth, expected_lines):
result = meth(card)
expected = "\n".join(expected_lines)
expected = re.escape(expected)
expected = expected.replace(r"\.\.\.", ".*")
assert re.match(expected, result)
assert reprs_equal(expected, result)

@pytest.mark.parametrize("meth", [repr, str])
def test_card_repr_empty_card(self, meth):
Expand All @@ -1333,16 +1339,14 @@ def test_very_long_lines_are_shortened(self, card: Card, meth, expected_lines):

# expected results contain 1 line at the very end
extra_line = (
" my_section=very long line very long l... "
"line very long line very long line ,"
" my_section=very long line very long line ve...e very long line "
"very long line ,"
)
expected_lines.insert(-1, extra_line)
expected = "\n".join(expected_lines)
expected = re.escape(expected)
expected = expected.replace(r"\.\.\.", ".*")

result = meth(card)
assert re.match(expected, result)
assert reprs_equal(expected, result)

@pytest.mark.parametrize("meth", [repr, str])
def test_without_model_attribute(self, card: Card, meth, expected_lines):
Expand All @@ -1351,11 +1355,9 @@ def test_without_model_attribute(self, card: Card, meth, expected_lines):
# remove line 1 from expected results, which corresponds to the model
del expected_lines[1]
expected = "\n".join(expected_lines)
expected = re.escape(expected)
expected = expected.replace(r"\.\.\.", ".*")

result = meth(card)
assert re.match(expected, result)
assert reprs_equal(expected, result)

@pytest.mark.parametrize("meth", [repr, str])
def test_with_metadata(self, card: Card, meth, expected_lines):
Expand All @@ -1379,11 +1381,9 @@ def test_with_metadata(self, card: Card, meth, expected_lines):
" metadata.widget=[{...}],",
]
expected = "\n".join(expected_lines[:2] + extra_lines + expected_lines[2:])
expected = re.escape(expected)
expected = expected.replace(r"\.\.\.", ".*")
result = meth(card)

assert re.match(expected, result)
assert reprs_equal(expected, result)


class TestCardModelAttributeIsPath:
Expand Down
27 changes: 23 additions & 4 deletions skops/io/_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,12 @@ def _construct(self):

def random_generator_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
bit_generator_state = get_state(obj.bit_generator.state, save_context)
seed_seq_state = get_state(obj.bit_generator.seed_seq.state, save_context)
res = {
"__class__": obj.__class__.__name__,
"__module__": get_module(type(obj)),
"__loader__": "RandomGeneratorNode",
"content": {"bit_generator": bit_generator_state},
"content": {"bit_generator": bit_generator_state, "seed_seq": seed_seq_state},
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I can't quite remember how it worked: Would adding an item here affect compatibility? I guess it's backwards compatible as witnessed by the test but not forwards compatible? Also, is this a change for numpy v2 or was it an oversight of not having this?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in numpy~=1 we have:

>>> np.random.default_rng().__reduce__()
(<function __generator_ctor at 0x777841d798a0>, ('PCG64', <function __bit_generator_ctor at 0x777841d3cc20>), {'bit_generator': 'PCG64', 'state': {'state': 220935942547292961595576198891696034818, 'inc': 281084058663618135801235667263986488477}, 'has_uint32': 0, 'uinteger': 0})

while numpy=2 does this:

>>> np.random.default_rng().__reduce__()
(<function __generator_ctor at 0x778c98b64b80>, (<numpy.random._pcg64.PCG64 object at 0x778c99543480>,), None)

and that means our check fails, and while fixing the check going recursively in what's in the output of __reduce__, I noticed we're not saving the seed sequence's state. It doesn't seem to have an affect in the next few generated numbers as I checked, but it's something which can be stored and loaded, so I added it.

}
return res

Expand All @@ -196,17 +197,27 @@ def __init__(
self.children = {
"bit_generator_state": get_tree(
state["content"]["bit_generator"], load_context, trusted=trusted
)
),
"seed_seq_state": get_tree(
state["content"]["seed_seq"], load_context, trusted=trusted
),
}
self.trusted = self._get_trusted(trusted, [np.random.Generator])

def _construct(self):
# first restore the state of the bit generator
seed_seq_cls = gettype(
"numpy.random.bit_generator",
"SeedSequence",
)
seed_seq_state = self.children["seed_seq_state"].construct()
seed_seq = seed_seq_cls(**seed_seq_state)

bit_generator_state = self.children["bit_generator_state"].construct()
bit_generator_cls = gettype(
"numpy.random", bit_generator_state["bit_generator"]
)
bit_generator = bit_generator_cls()
bit_generator = bit_generator_cls(seed_seq)
bit_generator.state = bit_generator_state

# next create the generator instance
Expand Down Expand Up @@ -260,7 +271,15 @@ def _construct(self):
try:
# From numpy=1.25.0 dispatching for `__array_function__` is done via
# a C wrapper: https://github.com/numpy/numpy/pull/23020
from numpy.core._multiarray_umath import _ArrayFunctionDispatcher
try:
# numpy>=2
from numpy._core._multiarray_umath import ( # type: ignore
_ArrayFunctionDispatcher,
)
except ImportError:
from numpy.core._multiarray_umath import ( # type: ignore
_ArrayFunctionDispatcher,
)

GET_STATE_DISPATCH_FUNCTIONS.append((_ArrayFunctionDispatcher, function_get_state))
except ImportError:
Expand Down
2 changes: 1 addition & 1 deletion skops/io/_persist.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# them. Old protocols are found in the 'old/' directory, with the protocol
# version appended to the corresponding module name.
modules = ["._general", "._numpy", "._scipy", "._sklearn", "._quantile_forest"]
modules.extend([".old._general_v0", ".old._numpy_v0"])
modules.extend([".old._general_v0", ".old._numpy_v0", ".old._numpy_v1"])
for module_name in modules:
# register exposed functions for get_state and get_tree
module = importlib.import_module(module_name, package="skops.io")
Expand Down
2 changes: 1 addition & 1 deletion skops/io/_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@
version Y instead.

"""
PROTOCOL = 1
PROTOCOL = 2
5 changes: 4 additions & 1 deletion skops/io/_quantile_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

try:
from quantile_forest._quantile_forest_fast import QuantileForest
except ImportError:
except Exception:
# Mostly ImportError, but in case of older QuantileForest and numpy>=2 it
# could also be ValueError.
# In general, this warrants no errors on our side if the import fails.
QuantileForest = None


Expand Down
13 changes: 10 additions & 3 deletions skops/io/_trusted_types.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
import scipy
from sklearn.utils import all_estimators
Expand All @@ -14,15 +16,20 @@
if get_type_name(estimator_class).startswith("sklearn.")
]

SCIPY_UFUNC_TYPE_NAMES = get_public_type_names(module=scipy.special, oftype=np.ufunc)
with warnings.catch_warnings():
# This is to suppress deprecation warning coming from the fact that scipy reports
# numpy.core for ufuncs, and numpy.core is deprecated and renamed to numpy._core
warnings.simplefilter("ignore", category=DeprecationWarning)
SCIPY_UFUNC_TYPE_NAMES = get_public_type_names(
module=scipy.special, oftype=np.ufunc
)

NUMPY_UFUNC_TYPE_NAMES = get_public_type_names(module=np, oftype=np.ufunc)

NUMPY_DTYPE_TYPE_NAMES = sorted(
{
type_name
for dtypes in np.sctypes.values()
for dtype in dtypes # type: ignore
for dtype in np.sctypeDict.values()
if (type_name := get_type_name(dtype)).startswith("numpy")
}
)
8 changes: 6 additions & 2 deletions skops/io/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import importlib
import sys
import warnings
from dataclasses import dataclass, field
from functools import singledispatch
from types import ModuleType
Expand Down Expand Up @@ -46,8 +47,11 @@ def whichmodule(obj: Any, name: str) -> str:
):
continue
try:
if _getattribute(module, name)[0] is obj:
return module_name
with warnings.catch_warnings():
# this is to silence numpy.core import warnings
warnings.simplefilter("ignore", DeprecationWarning)
if _getattribute(module, name)[0] is obj:
return module_name
except AttributeError:
pass
return "__main__"
Expand Down
44 changes: 44 additions & 0 deletions skops/io/old/_numpy_v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

from typing import Any, Optional, Sequence

import numpy as np

from skops.io._audit import Node, get_tree
from skops.io._utils import LoadContext, gettype

PROTOCOL = 1


class RandomGeneratorNode(Node):
def __init__(
self,
state: dict[str, Any],
load_context: LoadContext,
trusted: Optional[Sequence[str]] = None,
) -> None:
super().__init__(state, load_context, trusted)
self.children = {
"bit_generator_state": get_tree(
state["content"]["bit_generator"], load_context, trusted=trusted
)
}
self.trusted = self._get_trusted(trusted, [np.random.Generator])

def _construct(self):
# first restore the state of the bit generator
bit_generator_state = self.children["bit_generator_state"].construct()
bit_generator_cls = gettype(
"numpy.random", bit_generator_state["bit_generator"]
)
bit_generator = bit_generator_cls()
bit_generator.state = bit_generator_state

# next create the generator instance
return gettype(self.module_name, self.class_name)(bit_generator=bit_generator)


# tuples of type and function that creates the instance of that type
NODE_TYPE_MAPPING = {
("RandomGeneratorNode", PROTOCOL): RandomGeneratorNode,
}
Loading