Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ v0.5
----
- Support more array-like data types for tabular data and list-like data types
for text data. :pr:`179` by `Francesco Cariaggi`_.
- Add an option `use_intelex` to :func:`skops.hub_utils.init` which, when
enabled, will result in the Hugging Face inference API running with Intel's
scikit-learn intelex library, which can accelerate inference times. :pr:`267`
by `Benjamin Bossan`_.

v0.4
----
Expand Down
3 changes: 3 additions & 0 deletions skops/card/_model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData:
if task:
card_data.tags += [task]
card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file") # type: ignore
if config.get("sklearn", {}).get("use_intelex"):
card_data.tags.append("scikit-learn-intelex")
Comment thread
adrinjalali marked this conversation as resolved.

example_input = config.get("sklearn", {}).get("example_input", None)
# Documentation on what the widget expects:
# https://huggingface.co/docs/hub/models-widgets-examples
Expand Down
32 changes: 32 additions & 0 deletions skops/card/tests/test_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,38 @@ def test_metadata_model_format_skops(
metadata = metadata_load(local_path=Path(destination_path) / "README.md")
assert metadata["model_format"] == "skops"

def test_metadata_tags_without_sklearn_intelex_tag(
self, destination_path, iris_data, iris_pkl_file
):
# by default, intelex is not being used
X, _ = iris_data
hub_utils.init(
model=iris_pkl_file,
requirements=[],
dst=destination_path,
task="tabular-classification",
data=X,
)

metadata = metadata_from_config(destination_path)
assert "scikit-learn-intelex" not in metadata.tags

def test_metadata_tags_with_sklearn_intelex_tag(
self, destination_path, iris_data, iris_pkl_file
):
X, _ = iris_data
hub_utils.init(
model=iris_pkl_file,
requirements=[],
dst=destination_path,
task="tabular-classification",
data=X,
use_intelex=True,
)

metadata = metadata_from_config(destination_path)
assert "scikit-learn-intelex" in metadata.tags


@pytest.mark.xfail(reason="dynamic adjustment when model changes not implemented yet")
class TestModelDynamicUpdate:
Expand Down
35 changes: 27 additions & 8 deletions skops/hub_utils/_hf_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def _create_config(
"pickle",
"auto",
] = "auto",
use_intelex: bool = False,
) -> None:
"""Write the configuration into a ``config.json`` file.

Expand Down Expand Up @@ -248,16 +249,20 @@ def _create_config(
expected to be an array-like. Otherwise, it is expected to be an
sequence of strings.

model_format: str
model_format: str (default="auto")
The format used to persist the model. Can be ``"auto"``, ``"skops"``
or ``"pickle"``. Defaults to ``"auto"``, which would mean:

- ``"pickle"`` if the extension is one of ``{".pickle", ".pkl", ".joblib"}``
- ``"skops"`` if the extension is ``".skops"``

Returns
-------
None
use_intelex: bool (default=False)
Whether to enable ``scikit-learn-intelex``. This can accelerate some
sklearn models by a large factor with the right hardware. In most cases,
enabling this option should not break any code, even if the model was
not initially trained with scikit-learn intelex and even if the hardware
does not support it. For more info, see
https://intel.github.io/scikit-learn-intelex/.
"""
# so that we don't have to explicitly add keys and they're added as a
# dictionary if they are not found
Expand All @@ -276,11 +281,13 @@ def recursively_default_dict() -> MutableMapping:
"Cannot determine the input file format. Please indicate the format using"
" the `model_format` argument."
)

config = recursively_default_dict()
config["sklearn"]["model"]["file"] = str(model_path)
config["sklearn"]["environment"] = requirements
config["sklearn"]["task"] = task
config["sklearn"]["model_format"] = model_format
config["sklearn"]["use_intelex"] = use_intelex

if "tabular" in task:
config["sklearn"]["example_input"] = _get_example_input_from_tabular_data(data)
Expand Down Expand Up @@ -335,6 +342,7 @@ def init(
"pickle",
"auto",
] = "auto",
use_intelex: bool = False,
) -> None:
"""Initialize a scikit-learn based Hugging Face repo.

Expand Down Expand Up @@ -375,13 +383,17 @@ def init(
:class:`numpy.ndarray`. If ``task`` is ``"text-classification"`` or
``"text-regression"``, the data needs to be a ``list`` of strings.

model_format: str
model_format: str (default="auto")
The format the model was persisted in. Can be ``"auto"``, ``"skops"``
or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension.

Returns
-------
None
use_intelex: bool (default=False)
Whether to enable ``scikit-learn-intelex``. This can accelerate some
sklearn models by a large factor with the right hardware. In most cases,
enabling this option should not break any code, even if the model was
not initially trained with scikit-learn intelex and even if the hardware
does not support it. For more info, see
https://intel.github.io/scikit-learn-intelex/.
"""
dst = Path(dst)
if dst.exists() and bool(next(dst.iterdir(), None)):
Expand All @@ -396,6 +408,12 @@ def init(

dst.mkdir(parents=True, exist_ok=True)

# add intelex requirement, if it's used and not already in requirements
if use_intelex and not any(
r.startswith("scikit-learn-intelex") for r in requirements
):
requirements.append("scikit-learn-intelex")
Comment thread
adrinjalali marked this conversation as resolved.

try:
shutil.copy2(src=model, dst=dst)

Expand All @@ -407,6 +425,7 @@ def init(
task=task,
data=data,
model_format=model_format,
use_intelex=use_intelex,
)
except Exception:
shutil.rmtree(dst)
Expand Down
48 changes: 48 additions & 0 deletions skops/hub_utils/tests/test_hf_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,3 +697,51 @@ def test_adding_existing_file_raises(self, init_path, some_file_0):
)
with pytest.raises(FileExistsError, match=msg):
add_files(some_file_0, dst=init_path)


class TestUseIntelex:
# Tests related to the usage of scikit-learn intelex, see #251
def make_config(self, model, requirements, **kwargs):
dir_path = tempfile.mkdtemp()
shutil.rmtree(dir_path)

init(
model=model,
dst=dir_path,
task="tabular-classification",
data=iris.data,
requirements=requirements,
**kwargs,
)
config = get_config(dir_path)
return config

def test_no_intelex(self, classifier):
# by default, intelex is not being used
config = self.make_config(model=classifier, requirements=["foobar"])
environement = config["sklearn"]["environment"]

assert config["sklearn"]["use_intelex"] is False
assert not any(r.startswith("scikit-learn-intelex") for r in environement)
Comment thread
adrinjalali marked this conversation as resolved.

def test_use_intelex_but_not_explicitly_in_requirements(self, classifier):
# when using intelex, if it's not explicitly in the environment, add it
# automatically
config = self.make_config(
model=classifier, requirements=["foobar"], use_intelex=True
)
environement = config["sklearn"]["environment"]

assert config["sklearn"]["use_intelex"] is True
assert any(r == "scikit-learn-intelex" for r in environement)

def test_use_intelex_explicitly_in_requirements(self, classifier):
# when users specify intelex explicitly, it's not added automatically to
# the requirements
reqs = ["foobar", "scikit-learn-intelex==2023.0.0"]
config = self.make_config(model=classifier, requirements=reqs, use_intelex=True)
environement = config["sklearn"]["environment"]

assert config["sklearn"]["use_intelex"] is True
assert not any(r == "scikit-learn-intelex" for r in environement)
assert any(r == "scikit-learn-intelex==2023.0.0" for r in environement)