From f5e83a9bc6c9c31c0240e3809545879ac58166a2 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Tue, 1 Nov 2022 22:04:19 +0100 Subject: [PATCH 01/23] feat: generate README.md in hub_utils.init --- skops/hub_utils/_hf_hub.py | 86 +++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 6d361972..f550c999 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -13,7 +13,7 @@ from typing import Any, List, MutableMapping, Optional, Union import numpy as np -from huggingface_hub import HfApi, InferenceApi, snapshot_download +from huggingface_hub import CardData, HfApi, InferenceApi, ModelCard, snapshot_download from ..utils.fixes import Literal @@ -213,6 +213,83 @@ def recursively_default_dict() -> MutableMapping: dump_json(Path(dst) / "config.json", config) +def _create_readme( + *, + model_path: Union[str, Path], + requirements: List[str], + dst: Union[str, Path], + task: Literal[ + "tabular-classification", + "tabular-regression", + "text-classification", + "text-regression", + ], + data, +) -> None: + """Write the metadata into a ``README.md`` file + + Parameters + ---------- + model_path : str, or Path + The relative path (from the repo root) to the model file. + + requirements : list of str + A list of required packages. The versions are then extracted from the + current environment. + + dst : str, or Path + The path to an existing folder where the config file should be created. + + task: "tabular-classification", "tabular-regression", + "text-classification", / + or "text-regression" + The task of the model, which determines the input and output type of + the model. It can be one of: ``tabular-classification``, + ``tabular-regression``, ``text-classification``, ``text-regression``. + + data: array-like + The input to the model. This is used for two purposes: + + 1. Save an example input to the model, which is used by + HuggingFace's backend and shown in the widget of the model's + page. + 2. Store the columns and their order of the input, which is used by + HuggingFace's backend to pass the data in the right form to the + model. + + The first 3 input values are used as example inputs. + + Returns + ------- + None + """ + card_data = CardData() + card_data.library_name = "sklearn" + card_data.tags = ["sklearn", "skops"] + card_data.task = task + if task: + card_data.tags += [task] + card_data.model_file = str(model_path) + + if "tabular" in task: + example_input = _get_example_input(data) + elif "text" in task: + if isinstance(data, list) and all(isinstance(x, str) for x in data): + example_input = {"data": data[:3]} + else: + raise ValueError("The data needs to be a list of strings.") + + # Documentation on what the widget expects: + # https://huggingface.co/docs/hub/models-widgets-examples + if example_input: + if "tabular" in task: + card_data.widget = {"structuredData": example_input} + # TODO: add text data example here. + + card = ModelCard.from_template(card_data=card_data) + card.save(Path(dst) / "README.md") + + def _check_model_file(path: str | Path) -> Path: """Perform sanity checks on the model file @@ -320,6 +397,13 @@ def init( task=task, data=data, ) + _create_readme( + model_path=model_name, + requirements=requirements, + dst=dst, + task=task, + data=data, + ) except Exception: shutil.rmtree(dst) raise From f0e9683c6a27385e8eac14cd470243a19a9b276c Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Wed, 2 Nov 2022 18:02:32 +0100 Subject: [PATCH 02/23] ref: replace _create_readme function with fewer lines --- skops/hub_utils/_hf_hub.py | 94 ++++---------------------------------- 1 file changed, 9 insertions(+), 85 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index f550c999..e2abf9cb 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -10,10 +10,13 @@ import shutil import warnings from pathlib import Path +from pickle import load from typing import Any, List, MutableMapping, Optional, Union import numpy as np -from huggingface_hub import CardData, HfApi, InferenceApi, ModelCard, snapshot_download +from huggingface_hub import HfApi, InferenceApi, snapshot_download + +from skops import card from ..utils.fixes import Literal @@ -213,83 +216,6 @@ def recursively_default_dict() -> MutableMapping: dump_json(Path(dst) / "config.json", config) -def _create_readme( - *, - model_path: Union[str, Path], - requirements: List[str], - dst: Union[str, Path], - task: Literal[ - "tabular-classification", - "tabular-regression", - "text-classification", - "text-regression", - ], - data, -) -> None: - """Write the metadata into a ``README.md`` file - - Parameters - ---------- - model_path : str, or Path - The relative path (from the repo root) to the model file. - - requirements : list of str - A list of required packages. The versions are then extracted from the - current environment. - - dst : str, or Path - The path to an existing folder where the config file should be created. - - task: "tabular-classification", "tabular-regression", - "text-classification", / - or "text-regression" - The task of the model, which determines the input and output type of - the model. It can be one of: ``tabular-classification``, - ``tabular-regression``, ``text-classification``, ``text-regression``. - - data: array-like - The input to the model. This is used for two purposes: - - 1. Save an example input to the model, which is used by - HuggingFace's backend and shown in the widget of the model's - page. - 2. Store the columns and their order of the input, which is used by - HuggingFace's backend to pass the data in the right form to the - model. - - The first 3 input values are used as example inputs. - - Returns - ------- - None - """ - card_data = CardData() - card_data.library_name = "sklearn" - card_data.tags = ["sklearn", "skops"] - card_data.task = task - if task: - card_data.tags += [task] - card_data.model_file = str(model_path) - - if "tabular" in task: - example_input = _get_example_input(data) - elif "text" in task: - if isinstance(data, list) and all(isinstance(x, str) for x in data): - example_input = {"data": data[:3]} - else: - raise ValueError("The data needs to be a list of strings.") - - # Documentation on what the widget expects: - # https://huggingface.co/docs/hub/models-widgets-examples - if example_input: - if "tabular" in task: - card_data.widget = {"structuredData": example_input} - # TODO: add text data example here. - - card = ModelCard.from_template(card_data=card_data) - card.save(Path(dst) / "README.md") - - def _check_model_file(path: str | Path) -> Path: """Perform sanity checks on the model file @@ -397,13 +323,11 @@ def init( task=task, data=data, ) - _create_readme( - model_path=model_name, - requirements=requirements, - dst=dst, - task=task, - data=data, - ) + + with open(model, "rb") as f: + model = load(f) + model_card = card.Card(model, metadata=card.metadata_from_config(dst)) + model_card.save(dst / "README.md") except Exception: shutil.rmtree(dst) raise From 1aeb14cae2d9841166e6cd2e934f0d8af06f9c78 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Thu, 3 Nov 2022 16:09:01 +0100 Subject: [PATCH 03/23] test create model card in hub_utils.init --- skops/hub_utils/tests/test_hf_hub.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 590b0169..ab31cb3b 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -271,6 +271,23 @@ def test_init(classifier_pickle, config_json): ) +def test_init_modelcard_creation(classifier_pickle, config_json): + # create a temp directory and delete it, we just need a unique name. + dir_path = tempfile.mkdtemp() + shutil.rmtree(dir_path) + + version = metadata.version("scikit-learn") + init( + model=classifier_pickle, + requirements=[f'scikit-learn="{version}"'], + dst=dir_path, + task="tabular-classification", + data=iris.data, + ) + _validate_folder(path=dir_path) + assert os.path.isfile(Path(dir_path) / "README.md") + + def test_init_no_warning_or_error(classifier_pickle, config_json): # for the happy path, there should be no warning dir_path = tempfile.mkdtemp() From 95c0e1bf2185cf7e1353269422caa2a8a7d4bdc3 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Thu, 3 Nov 2022 16:12:09 +0100 Subject: [PATCH 04/23] test override model card after created by hub_utils.init --- skops/hub_utils/tests/test_hf_hub.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index ab31cb3b..0daf2cab 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -288,6 +288,32 @@ def test_init_modelcard_creation(classifier_pickle, config_json): assert os.path.isfile(Path(dir_path) / "README.md") +def test_override_init_modelcard(classifier_pickle, config_json): + # create a temp directory and delete it, we just need a unique name. + dir_path = tempfile.mkdtemp() + shutil.rmtree(dir_path) + + version = metadata.version("scikit-learn") + init( + model=classifier_pickle, + requirements=[f'scikit-learn="{version}"'], + dst=dir_path, + task="tabular-classification", + data=iris.data, + ) + _validate_folder(path=dir_path) + t0 = os.path.getmtime(Path(dir_path) / "README.md") + + # override existent modelcard created by init + model = get_classifier() + model_card = card.Card(model, metadata=card.metadata_from_config(Path(dir_path))) + model_card.save(Path(dir_path) / "README.md") + t1 = os.path.getmtime(Path(dir_path) / "README.md") + + # compare the times at which the files were last modified + assert t0 != t1 + + def test_init_no_warning_or_error(classifier_pickle, config_json): # for the happy path, there should be no warning dir_path = tempfile.mkdtemp() From 4b6cb7392faeaf004a126c599f06b8c694ca15c9 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Mon, 14 Nov 2022 10:40:30 +0100 Subject: [PATCH 05/23] ref: deduplicate test creation of README in init --- skops/hub_utils/tests/test_hf_hub.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index ed42862d..4def319e 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -260,6 +260,8 @@ def test_init(classifier_pickle, config_json): ) _validate_folder(path=dir_path) + assert os.path.isfile(Path(dir_path) / "README.md") + # it should fail a second time since the folder is no longer empty. with pytest.raises(OSError, match="None-empty dst path already exists!"): init( @@ -271,23 +273,6 @@ def test_init(classifier_pickle, config_json): ) -def test_init_modelcard_creation(classifier_pickle, config_json): - # create a temp directory and delete it, we just need a unique name. - dir_path = tempfile.mkdtemp() - shutil.rmtree(dir_path) - - version = metadata.version("scikit-learn") - init( - model=classifier_pickle, - requirements=[f'scikit-learn="{version}"'], - dst=dir_path, - task="tabular-classification", - data=iris.data, - ) - _validate_folder(path=dir_path) - assert os.path.isfile(Path(dir_path) / "README.md") - - def test_override_init_modelcard(classifier_pickle, config_json): # create a temp directory and delete it, we just need a unique name. dir_path = tempfile.mkdtemp() @@ -438,7 +423,6 @@ def repo_path_for_inference(): @pytest.mark.network -@pytest.mark.inference @pytest.mark.skipif( IS_SKLEARN_DEV_BUILD, reason="Inference tests cannot run with sklearn dev build" ) @@ -458,8 +442,7 @@ def test_inference( repo_path_for_inference, destination_path, ): - # test inference backend for classifier and regressor models. This test can - # take a lot of time and be flaky. + # test inference backend for classifier and regressor models. client = HfApi() repo_path = repo_path_for_inference From 870797f70d1f7e2317ac10058687450bf8bede6d Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Mon, 14 Nov 2022 20:07:39 +0100 Subject: [PATCH 06/23] fix: check that content of new model card is modified --- skops/hub_utils/tests/test_hf_hub.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 4def319e..a9996e97 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -14,6 +14,7 @@ import sklearn from flaky import flaky from huggingface_hub import HfApi +from huggingface_hub.repocard import RepoCard from huggingface_hub.utils import RepositoryNotFoundError from sklearn.datasets import load_diabetes, load_iris from sklearn.linear_model import LinearRegression, LogisticRegression @@ -287,16 +288,21 @@ def test_override_init_modelcard(classifier_pickle, config_json): data=iris.data, ) _validate_folder(path=dir_path) - t0 = os.path.getmtime(Path(dir_path) / "README.md") - # override existent modelcard created by init + # inital card does not have a license set + with pytest.raises( + AttributeError, match="'CardData' object has no attribute 'license'" + ): + model_card = RepoCard.load(Path(dir_path) / "README.md") + model_card.data.license + + # override existent modelcard created by init with license attribute model = get_classifier() model_card = card.Card(model, metadata=card.metadata_from_config(Path(dir_path))) + model_card.metadata.license = "mit" model_card.save(Path(dir_path) / "README.md") - t1 = os.path.getmtime(Path(dir_path) / "README.md") - - # compare the times at which the files were last modified - assert t0 != t1 + new_card = RepoCard.load(Path(dir_path) / "README.md") + assert new_card.data.license == "mit" def test_init_no_warning_or_error(classifier_pickle, config_json): From f182ee1d10bb5ea1cce90e7adc860fa6c78d59b6 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Fri, 18 Nov 2022 10:25:20 +0100 Subject: [PATCH 07/23] revert lines removed by mistake --- skops/hub_utils/tests/test_hf_hub.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index a9996e97..636bf0dc 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -429,6 +429,7 @@ def repo_path_for_inference(): @pytest.mark.network +@pytest.mark.inference @pytest.mark.skipif( IS_SKLEARN_DEV_BUILD, reason="Inference tests cannot run with sklearn dev build" ) @@ -448,7 +449,8 @@ def test_inference( repo_path_for_inference, destination_path, ): - # test inference backend for classifier and regressor models. + # test inference backend for classifier and regressor models. This test can + # take a lot of time and be flaky. client = HfApi() repo_path = repo_path_for_inference From 0b6d3e26bb9bd05bb37c12583c232d5878007ee5 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Sat, 21 Jan 2023 16:59:24 +0100 Subject: [PATCH 08/23] fix: check model format of model file --- skops/hub_utils/_hf_hub.py | 14 ++++++++++---- skops/hub_utils/tests/test_hf_hub.py | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 39a5647d..8db283ed 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -11,6 +11,9 @@ import shutil from pathlib import Path from typing import Any, List, Literal, MutableMapping, Optional, Sequence, Union +from pickle import load as pikle_load +from skops import card, io + import numpy as np from huggingface_hub import HfApi, InferenceApi, snapshot_download @@ -409,10 +412,13 @@ def init( model_format=model_format, ) - with open(model, "rb") as f: - model = load(f) - model_card = card.Card(model, metadata=card.metadata_from_config(dst)) - model_card.save(dst / "README.md") + if model_format == 'pickle': + with open(model, "rb") as f: + model = pikle_load(f) + elif model_format == 'skops': + model = io.load(model) + model_card = card.Card(model, metadata=card.metadata_from_config(dst)) + model_card.save(dst / "README.md") except Exception: shutil.rmtree(dst) raise diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 7445e5a8..6b0aefc0 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -313,7 +313,7 @@ def test_override_init_modelcard(classifier, config_json): version = metadata.version("scikit-learn") init( - model=classifier_pickle, + model=classifier, requirements=[f'scikit-learn="{version}"'], dst=dir_path, task="tabular-classification", From 5e1494a571129747fd7360726396bdc33149a7d4 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Mon, 23 Jan 2023 22:50:46 +0100 Subject: [PATCH 09/23] fix: run pre-commit on all files --- .github/workflows/PULL_REQUEST_TEMPLATE.md | 10 +++++----- skops/hub_utils/_hf_hub.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/PULL_REQUEST_TEMPLATE.md b/.github/workflows/PULL_REQUEST_TEMPLATE.md index d73ca321..7fb46c06 100644 --- a/.github/workflows/PULL_REQUEST_TEMPLATE.md +++ b/.github/workflows/PULL_REQUEST_TEMPLATE.md @@ -1,16 +1,16 @@ #### Reference Issues/PRs @@ -29,4 +29,4 @@ review, either the pull request needs some benchmarking, tinkering, convincing, etc. or more likely the reviewers are simply busy. Thanks for contributing! ---> \ No newline at end of file +--> diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 8db283ed..eed4f380 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -10,15 +10,15 @@ import os import shutil from pathlib import Path -from typing import Any, List, Literal, MutableMapping, Optional, Sequence, Union from pickle import load as pikle_load -from skops import card, io - +from typing import Any, List, Literal, MutableMapping, Optional, Sequence, Union import numpy as np from huggingface_hub import HfApi, InferenceApi, snapshot_download from sklearn.utils import check_array +from skops import card, io + SUPPORTED_TASKS = [ "tabular-classification", "tabular-regression", @@ -412,10 +412,10 @@ def init( model_format=model_format, ) - if model_format == 'pickle': + if model_format == "pickle": with open(model, "rb") as f: model = pikle_load(f) - elif model_format == 'skops': + elif model_format == "skops": model = io.load(model) model_card = card.Card(model, metadata=card.metadata_from_config(dst)) model_card.save(dst / "README.md") From 0c4a66fdc11025ba2a06fedfba4cb63a86144989 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Wed, 1 Feb 2023 00:45:14 +0100 Subject: [PATCH 10/23] fix: check for file suffix to determine format --- skops/hub_utils/_hf_hub.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index c0d5c972..5cb03345 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -431,10 +431,11 @@ def init( use_intelex=use_intelex, ) - if model_format == "pickle": + extension = Path(model_name).suffix + if extension in [".pkl", ".pickle", ".joblib"]: with open(model, "rb") as f: model = pikle_load(f) - elif model_format == "skops": + elif extension == ".skops": model = io.load(model) model_card = card.Card(model, metadata=card.metadata_from_config(dst)) model_card.save(dst / "README.md") From 217190829de366aa081f049829f2141633e3517b Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Sun, 12 Feb 2023 21:37:39 +0100 Subject: [PATCH 11/23] feat: implement model caching with sha256 hash --- skops/card/_model_card.py | 26 +++++++++++++++++++++++++- skops/card/tests/test_card.py | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index d5e0e55c..43d33f13 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -6,6 +6,8 @@ import zipfile from collections.abc import Mapping from dataclasses import dataclass, field +from functools import lru_cache, wraps +from hashlib import sha256 from pathlib import Path from reprlib import Repr from typing import Any, Iterator, Literal, Protocol, Sequence, Union @@ -297,7 +299,29 @@ def format(self) -> str: ... # pragma: no cover -def _load_model(model: Any, trusted=False) -> Any: +def hash_model(func): + @wraps(func) + def wrapped(*args, **kargs): + m = sha256() + if isinstance(args[0], (Path, str)): + with open(args[0], "rb") as f: + m.update(f.read()) + + args = list(args) + args.insert(0, m.hexdigest()) + wrapped._args_ = args + else: # the object is already loaded, no need to compute hash + args = list(args) + args.insert(0, 0) + wrapped._args_ = args + return func(*args, **kargs) + + return wrapped + + +@lru_cache +@hash_model +def _load_model(hash_model: Any, model: Any, trusted=False) -> Any: """Return a model instance. Loads the model if provided a file path, if already a model instance return diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 9ad94277..1bbddd0e 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -3,6 +3,7 @@ import re import tempfile import textwrap +from hashlib import sha256 from pathlib import Path import numpy as np @@ -22,6 +23,7 @@ PlotSection, TableSection, _load_model, + hash_model, ) from skops.io import dump @@ -43,6 +45,22 @@ def save_model_to_file(model_instance, suffix): return save_file_handle, save_file +@pytest.mark.parametrize("suffix", [".pkl"]) +def test_hash_model(suffix): + model0 = LinearRegression(n_jobs=123) + _, save_file = save_model_to_file(model0, suffix) + + @hash_model + def empty_fn(*args, **kargs): + return args + + _hash, filename = empty_fn(save_file) + m = sha256() + with open(save_file, "rb") as f: + m.update(f.read()) + assert _hash == m.hexdigest() + + @pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"]) def test_load_model(suffix): model0 = LinearRegression(n_jobs=123) From 36b855b13ccc8099cdc86551b01eaaa0f4673cce Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Sun, 12 Feb 2023 21:47:32 +0100 Subject: [PATCH 12/23] feat: extend test to test cache model loading --- skops/card/tests/test_card.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 1bbddd0e..ba2689d3 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -69,11 +69,20 @@ def test_load_model(suffix): save_file_path = Path(save_file) loaded_model_path = _load_model(save_file_path, trusted=True) loaded_model_instance = _load_model(model0, trusted=True) + _load_model(save_file, trusted=True) # extra call to test caching + _load_model(save_file, trusted=True) # extra call to test caching + cache_info = _load_model.cache_info() assert loaded_model_str.n_jobs == 123 assert loaded_model_path.n_jobs == 123 assert loaded_model_instance.n_jobs == 123 + assert cache_info.hits == 2 + assert cache_info.misses == 3 + assert cache_info.currsize == 3 + # clear cache for each test case [".pkl", ".pickle", ".skops"] + _load_model.cache_clear() + @pytest.fixture def model_card(model_diagram=True): From 81523ac4528c70a9a2a8aa62c9d1456ed575b24f Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Sun, 12 Feb 2023 21:52:55 +0100 Subject: [PATCH 13/23] add rest of suffixes in test_hash_model --- skops/card/tests/test_card.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index ba2689d3..cd02b98f 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -45,7 +45,7 @@ def save_model_to_file(model_instance, suffix): return save_file_handle, save_file -@pytest.mark.parametrize("suffix", [".pkl"]) +@pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"]) def test_hash_model(suffix): model0 = LinearRegression(n_jobs=123) _, save_file = save_model_to_file(model0, suffix) From c8e928192e57e22a688653720af6dd569ea5789f Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Sun, 26 Feb 2023 17:17:43 +0100 Subject: [PATCH 14/23] fix: cache model within the model card object --- skops/card/_model_card.py | 49 ++++++++++++++--------------------- skops/card/tests/test_card.py | 41 +++++++++++------------------ 2 files changed, 35 insertions(+), 55 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 43d33f13..6639220f 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -6,7 +6,7 @@ import zipfile from collections.abc import Mapping from dataclasses import dataclass, field -from functools import lru_cache, wraps +from functools import cached_property from hashlib import sha256 from pathlib import Path from reprlib import Repr @@ -299,29 +299,7 @@ def format(self) -> str: ... # pragma: no cover -def hash_model(func): - @wraps(func) - def wrapped(*args, **kargs): - m = sha256() - if isinstance(args[0], (Path, str)): - with open(args[0], "rb") as f: - m.update(f.read()) - - args = list(args) - args.insert(0, m.hexdigest()) - wrapped._args_ = args - else: # the object is already loaded, no need to compute hash - args = list(args) - args.insert(0, 0) - wrapped._args_ = args - return func(*args, **kargs) - - return wrapped - - -@lru_cache -@hash_model -def _load_model(hash_model: Any, model: Any, trusted=False) -> Any: +def _load_model(model: Any, trusted=False) -> Any: """Return a model instance. Loads the model if provided a file path, if already a model instance return @@ -488,6 +466,7 @@ def __init__( self._data: dict[str, Section] = {} self._metrics: dict[str, str | float | int] = {} + self._model_hash = "" self._populate_template() @@ -515,19 +494,31 @@ def _populate_template(self): def get_model(self) -> Any: """Returns sklearn estimator object. - If the ``model`` is already loaded, return it as is. If the ``model`` attribute is a ``Path``/``str``, load the model and return it. - Returns ------- model : BaseEstimator The model instance. - """ + if isinstance(self.model, (str, Path)) and hasattr(self, "_model"): + hash_obj = sha256() + buf_size = 2 ** 20 # load in chunks to save memory + with open(self.model, "rb") as f: + for chunk in iter(lambda: f.read(buf_size), b""): + hash_obj.update(chunk) + model_hash = hash_obj.hexdigest() + + # if hash changed, invalidate cache by deleting attribute + if model_hash != self._model_hash: + del self._model + self._model_hash = model_hash + + return self._model + + @cached_property + def _model(self): model = _load_model(self.model, self.trusted) - # Ideally, we would only call the method below if we *know* that the - # model has changed, but at the moment we have no way of knowing that return model def add(self, **kwargs: str | Formattable) -> Card: diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index cd02b98f..92443695 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -23,7 +23,6 @@ PlotSection, TableSection, _load_model, - hash_model, ) from skops.io import dump @@ -45,22 +44,6 @@ def save_model_to_file(model_instance, suffix): return save_file_handle, save_file -@pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"]) -def test_hash_model(suffix): - model0 = LinearRegression(n_jobs=123) - _, save_file = save_model_to_file(model0, suffix) - - @hash_model - def empty_fn(*args, **kargs): - return args - - _hash, filename = empty_fn(save_file) - m = sha256() - with open(save_file, "rb") as f: - m.update(f.read()) - assert _hash == m.hexdigest() - - @pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"]) def test_load_model(suffix): model0 = LinearRegression(n_jobs=123) @@ -69,20 +52,11 @@ def test_load_model(suffix): save_file_path = Path(save_file) loaded_model_path = _load_model(save_file_path, trusted=True) loaded_model_instance = _load_model(model0, trusted=True) - _load_model(save_file, trusted=True) # extra call to test caching - _load_model(save_file, trusted=True) # extra call to test caching - cache_info = _load_model.cache_info() assert loaded_model_str.n_jobs == 123 assert loaded_model_path.n_jobs == 123 assert loaded_model_instance.n_jobs == 123 - assert cache_info.hits == 2 - assert cache_info.misses == 3 - assert cache_info.currsize == 3 - # clear cache for each test case [".pkl", ".pickle", ".skops"] - _load_model.cache_clear() - @pytest.fixture def model_card(model_diagram=True): @@ -168,6 +142,21 @@ def test_save_model_card(destination_path, model_card): model_card.save(Path(destination_path) / "README.md") assert (Path(destination_path) / "README.md").exists() +def test_model_caching(skops_model_card_metadata_from_config, iris_skops_file, destination_path): + card = Card(iris_skops_file, metadata=metadata_from_config(destination_path)) + assert str(card._model_hash) == card.__dict__["_model_hash"] + iris_model_hash = card._model_hash + # update card with new model + new_model = LogisticRegression() + _, save_file = save_model_to_file(new_model, ".skops") + del card.model + card.model = save_file + card.get_model() # model gets cached + assert str(card._model_hash) == card.__dict__["_model_hash"] + logistic_reg_hash = card._model_hash + assert iris_model_hash != logistic_reg_hash + + CUSTOM_TEMPLATES = [None, {}, {"A Title", "Another Title", "A Title/A Section"}] # type: ignore From 3c2151a0ad7735673a34e1c44ecec436bc2cc552 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Tue, 28 Feb 2023 23:22:50 +0100 Subject: [PATCH 15/23] fix: run pre-comit on all files --- skops/card/_model_card.py | 2 +- skops/card/tests/test_card.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 6639220f..f276f019 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -503,7 +503,7 @@ def get_model(self) -> Any: """ if isinstance(self.model, (str, Path)) and hasattr(self, "_model"): hash_obj = sha256() - buf_size = 2 ** 20 # load in chunks to save memory + buf_size = 2**20 # load in chunks to save memory with open(self.model, "rb") as f: for chunk in iter(lambda: f.read(buf_size), b""): hash_obj.update(chunk) diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 92443695..9a68be79 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -3,7 +3,6 @@ import re import tempfile import textwrap -from hashlib import sha256 from pathlib import Path import numpy as np @@ -142,21 +141,23 @@ def test_save_model_card(destination_path, model_card): model_card.save(Path(destination_path) / "README.md") assert (Path(destination_path) / "README.md").exists() -def test_model_caching(skops_model_card_metadata_from_config, iris_skops_file, destination_path): + +def test_model_caching( + skops_model_card_metadata_from_config, iris_skops_file, destination_path +): card = Card(iris_skops_file, metadata=metadata_from_config(destination_path)) assert str(card._model_hash) == card.__dict__["_model_hash"] iris_model_hash = card._model_hash - # update card with new model + # update card with new model new_model = LogisticRegression() _, save_file = save_model_to_file(new_model, ".skops") del card.model card.model = save_file - card.get_model() # model gets cached + card.get_model() # model gets cached assert str(card._model_hash) == card.__dict__["_model_hash"] logistic_reg_hash = card._model_hash assert iris_model_hash != logistic_reg_hash - - + CUSTOM_TEMPLATES = [None, {}, {"A Title", "Another Title", "A Title/A Section"}] # type: ignore From 5af9b4ae982c606bd4fd073da5aa4a7716eac208 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Tue, 28 Feb 2023 23:28:54 +0100 Subject: [PATCH 16/23] ref: remove additional unrelated code --- skops/hub_utils/_hf_hub.py | 12 ---------- skops/hub_utils/tests/test_hf_hub.py | 34 ---------------------------- 2 files changed, 46 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 5cb03345..39139a15 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -10,15 +10,12 @@ import os import shutil from pathlib import Path -from pickle import load as pikle_load from typing import Any, List, Literal, MutableMapping, Optional, Sequence, Union import numpy as np from huggingface_hub import HfApi, InferenceApi, snapshot_download from sklearn.utils import check_array -from skops import card, io - SUPPORTED_TASKS = [ "tabular-classification", "tabular-regression", @@ -430,15 +427,6 @@ def init( model_format=model_format, use_intelex=use_intelex, ) - - extension = Path(model_name).suffix - if extension in [".pkl", ".pickle", ".joblib"]: - with open(model, "rb") as f: - model = pikle_load(f) - elif extension == ".skops": - model = io.load(model) - model_card = card.Card(model, metadata=card.metadata_from_config(dst)) - model_card.save(dst / "README.md") except Exception: shutil.rmtree(dst) raise diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 519ccb44..d2ee3931 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -15,7 +15,6 @@ import sklearn from flaky import flaky from huggingface_hub import HfApi -from huggingface_hub.repocard import RepoCard from huggingface_hub.utils import RepositoryNotFoundError from sklearn.datasets import load_diabetes, load_iris from sklearn.linear_model import LinearRegression, LogisticRegression @@ -293,8 +292,6 @@ def test_init(classifier, config_json): ) _validate_folder(path=dir_path) - assert os.path.isfile(Path(dir_path) / "README.md") - # it should fail a second time since the folder is no longer empty. with pytest.raises(OSError, match="None-empty dst path already exists!"): init( @@ -306,37 +303,6 @@ def test_init(classifier, config_json): ) -def test_override_init_modelcard(classifier, config_json): - # create a temp directory and delete it, we just need a unique name. - dir_path = tempfile.mkdtemp() - shutil.rmtree(dir_path) - - version = metadata.version("scikit-learn") - init( - model=classifier, - requirements=[f'scikit-learn="{version}"'], - dst=dir_path, - task="tabular-classification", - data=iris.data, - ) - _validate_folder(path=dir_path) - - # inital card does not have a license set - with pytest.raises( - AttributeError, match="'CardData' object has no attribute 'license'" - ): - model_card = RepoCard.load(Path(dir_path) / "README.md") - model_card.data.license - - # override existent modelcard created by init with license attribute - model = get_classifier() - model_card = card.Card(model, metadata=card.metadata_from_config(Path(dir_path))) - model_card.metadata.license = "mit" - model_card.save(Path(dir_path) / "README.md") - new_card = RepoCard.load(Path(dir_path) / "README.md") - assert new_card.data.license == "mit" - - def test_init_no_warning_or_error(classifier, config_json): config_path, file_format = config_json # for the happy path, there should be no warning From 1245ff32db6e4030ce4a6fc74717835931ee0b2d Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Mon, 13 Mar 2023 01:17:17 +0100 Subject: [PATCH 17/23] run precommit on all files and apply fixes --- docs/examples.rst | 8 ++++---- examples/plot_custom_model_card.py | 4 +++- examples/plot_model_card.py | 6 ++++-- examples/plot_tabular_regression.py | 4 ++-- skops/hub_utils/_hf_hub.py | 1 + 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index 75dc5097..570f1f2a 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -4,16 +4,16 @@ Examples of interactions with the Hugging Face Hub ================================================== - Creating the Model Card: - :ref:`sphx_glr_auto_examples_plot_model_card.py` is an example of using + :ref:`sphx_glr_auto_examples_plot_model_card.py` is an example of using skops to create a model card that can be used on the Hugging Face Hub. - Putting the Model Card on the Hub: - :ref:`sphx_glr_auto_examples_plot_hf_hub.py` is an example of using skops + :ref:`sphx_glr_auto_examples_plot_hf_hub.py` is an example of using skops to put a model card on the Hugging Face Hub. - Tabular Regression: - :ref:`sphx_glr_auto_examples_plot_tabular_regression.py` is an example of using skops to serialize a tabular + :ref:`sphx_glr_auto_examples_plot_tabular_regression.py` is an example of using skops to serialize a tabular regression model and create a model card and a Hugging Face Hub repository. - Text Classification: - :ref:`sphx_glr_auto_examples_plot_text_classification.py` is an example of using skops to serialize a text + :ref:`sphx_glr_auto_examples_plot_text_classification.py` is an example of using skops to serialize a text classification model and create a model card and a Hugging Face Hub repository. - Using Intel(R) Extension for scikit-learn: :ref:`sphx_glr_auto_examples_plot_intelex.py` is an example of using diff --git a/examples/plot_custom_model_card.py b/examples/plot_custom_model_card.py index 0033950b..1385836f 100644 --- a/examples/plot_custom_model_card.py +++ b/examples/plot_custom_model_card.py @@ -163,7 +163,9 @@ display.figure_.savefig(plot_file_name) model_card.add_plot( **{ - "Regression on California Housing dataset/Results/Partial Dependence Plots": plot_file_name + "Regression on California Housing dataset/Results/Partial Dependence Plots": ( + plot_file_name + ) }, ) diff --git a/examples/plot_model_card.py b/examples/plot_model_card.py index 5e68b7e0..278a6f6d 100644 --- a/examples/plot_model_card.py +++ b/examples/plot_model_card.py @@ -158,7 +158,7 @@ model_card.add_permutation_importances( importances, X_test.columns, - plot_file=Path(local_repo) / "importance.png", + plot_file=local_repo + "/importance.png", plot_name="Permutation Importance", ) @@ -174,7 +174,9 @@ model_card.add_table( folded=True, **{ - "Model description/Evaluation Results/Hyperparameter search results": cv_results, + "Model description/Evaluation Results/Hyperparameter search results": ( + cv_results + ), "Model description/Evaluation Results/Classification report": clf_report, }, ) diff --git a/examples/plot_tabular_regression.py b/examples/plot_tabular_regression.py index 5e9d8fb0..83a5ed48 100644 --- a/examples/plot_tabular_regression.py +++ b/examples/plot_tabular_regression.py @@ -16,7 +16,6 @@ from tempfile import mkdtemp, mkstemp import matplotlib.pyplot as plt -import pandas as pd import sklearn from sklearn.datasets import load_diabetes from sklearn.linear_model import LinearRegression @@ -42,7 +41,8 @@ # Train a Model # ============= # To train a model, we need to convert our data first to vectors. We will use -# StandardScalar in our pipeline. We will fit a Linear Regression model with the outputs of the scalar. +# StandardScalar in our pipeline. We will fit a Linear Regression +# model with the outputs of the scalar. model = Pipeline( [ ("scaler", StandardScaler()), diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 39139a15..efbd5c25 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -264,6 +264,7 @@ def _create_config( does not support it. For more info, see https://intel.github.io/scikit-learn-intelex/. """ + # so that we don't have to explicitly add keys and they're added as a # dictionary if they are not found # see: https://stackoverflow.com/a/13151294/2536294 From ff6d3b9f5c818e8ec9fe5aa817e9d40b393478c2 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Wed, 3 May 2023 22:44:47 +0200 Subject: [PATCH 18/23] fix test_model_caching with a higher level test --- skops/card/tests/test_card.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 8c70c867..f9d6a49c 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -4,6 +4,7 @@ import tempfile import textwrap from pathlib import Path +from unittest import mock import numpy as np import pytest @@ -147,18 +148,23 @@ def test_save_model_card(destination_path, model_card): def test_model_caching( skops_model_card_metadata_from_config, iris_skops_file, destination_path ): + # _load_model get called card = Card(iris_skops_file, metadata=metadata_from_config(destination_path)) - assert str(card._model_hash) == card.__dict__["_model_hash"] - iris_model_hash = card._model_hash - # update card with new model - new_model = LogisticRegression() - _, save_file = save_model_to_file(new_model, ".skops") - del card.model - card.model = save_file - card.get_model() # model gets cached - assert str(card._model_hash) == card.__dict__["_model_hash"] - logistic_reg_hash = card._model_hash - assert iris_model_hash != logistic_reg_hash + with mock.patch("skops.card._model_card._load_model") as mock_load_model: + model1 = card.get_model() + model2 = card.get_model() + assert model1 is model2 + # model is cached, hence _load_model is not called + mock_load_model.assert_not_called() + # update card with new model + new_model = LogisticRegression() + _, save_file = save_model_to_file(new_model, ".skops") + del card.model + card.model = save_file + model3 = card.get_model() # model gets cached + model4 = card.get_model() + assert model3 is model4 + assert mock_load_model.call_count == 1 CUSTOM_TEMPLATES = [None, {}, {"A Title", "Another Title", "A Title/A Section"}] # type: ignore From dd2eb68435877d36d6e0083bf2b18a87a488e6b6 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Wed, 3 May 2023 22:55:31 +0200 Subject: [PATCH 19/23] revert changes to origin --- examples/plot_model_card.py | 2 +- examples/plot_tabular_regression.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/plot_model_card.py b/examples/plot_model_card.py index d40c6be4..ca6c57a6 100644 --- a/examples/plot_model_card.py +++ b/examples/plot_model_card.py @@ -158,7 +158,7 @@ model_card.add_permutation_importances( importances, X_test.columns, - plot_file=local_repo + "/importance.png", + plot_file=Path(local_repo) / "importance.png", plot_name="Permutation Importance", ) diff --git a/examples/plot_tabular_regression.py b/examples/plot_tabular_regression.py index 83a5ed48..5e9d8fb0 100644 --- a/examples/plot_tabular_regression.py +++ b/examples/plot_tabular_regression.py @@ -16,6 +16,7 @@ from tempfile import mkdtemp, mkstemp import matplotlib.pyplot as plt +import pandas as pd import sklearn from sklearn.datasets import load_diabetes from sklearn.linear_model import LinearRegression @@ -41,8 +42,7 @@ # Train a Model # ============= # To train a model, we need to convert our data first to vectors. We will use -# StandardScalar in our pipeline. We will fit a Linear Regression -# model with the outputs of the scalar. +# StandardScalar in our pipeline. We will fit a Linear Regression model with the outputs of the scalar. model = Pipeline( [ ("scaler", StandardScaler()), From 7826c4ba866a3a4682030a8074787913a58dbae6 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Wed, 3 May 2023 22:59:34 +0200 Subject: [PATCH 20/23] revert changes to origin --- examples/plot_custom_model_card.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/plot_custom_model_card.py b/examples/plot_custom_model_card.py index 1385836f..0033950b 100644 --- a/examples/plot_custom_model_card.py +++ b/examples/plot_custom_model_card.py @@ -163,9 +163,7 @@ display.figure_.savefig(plot_file_name) model_card.add_plot( **{ - "Regression on California Housing dataset/Results/Partial Dependence Plots": ( - plot_file_name - ) + "Regression on California Housing dataset/Results/Partial Dependence Plots": plot_file_name }, ) From 1e1237c980488d59784fea9bd205e59e191f0e57 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Mon, 8 May 2023 09:04:03 +0200 Subject: [PATCH 21/23] apply and test suggestion --- skops/card/tests/test_card.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 6b48406d..32f15aab 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -26,7 +26,7 @@ TableSection, _load_model, ) -from skops.io import dump +from skops.io import dump, load from skops.utils.importutils import import_or_raise @@ -149,23 +149,29 @@ def test_save_model_card(destination_path, model_card): def test_model_caching( skops_model_card_metadata_from_config, iris_skops_file, destination_path ): - # _load_model get called + """Tests that the model card caches the model to avoid loading it multiple times""" + + new_model = LogisticRegression(random_state=4321) + # mock _load_model, it still loads the model but we can track call count + mock_load_model = mock.Mock(side_effect=load) card = Card(iris_skops_file, metadata=metadata_from_config(destination_path)) - with mock.patch("skops.card._model_card._load_model") as mock_load_model: + with mock.patch("skops.card._model_card._load_model", mock_load_model): model1 = card.get_model() model2 = card.get_model() assert model1 is model2 # model is cached, hence _load_model is not called mock_load_model.assert_not_called() - # update card with new model - new_model = LogisticRegression() - _, save_file = save_model_to_file(new_model, ".skops") - del card.model - card.model = save_file - model3 = card.get_model() # model gets cached + + # override model with new model + dump(new_model, card.model) + + model3 = card.get_model() + assert mock_load_model.call_count == 1 + assert model3.random_state == 4321 model4 = card.get_model() + assert model3 is model4 - assert mock_load_model.call_count == 1 + assert mock_load_model.call_count == 1 # cached call CUSTOM_TEMPLATES = [None, {}, {"A Title", "Another Title", "A Title/A Section"}] # type: ignore From 1879a3ef134d74826896be016b41f25659fde492 Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Mon, 8 May 2023 20:52:43 +0200 Subject: [PATCH 22/23] revert lines --- skops/card/_model_card.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index c2c44e00..2de2ef5f 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -557,8 +557,10 @@ def _populate_template(self, model_diagram: bool | Literal["auto"] | str): def get_model(self) -> Any: """Returns sklearn estimator object. + If the ``model`` is already loaded, return it as is. If the ``model`` attribute is a ``Path``/``str``, load the model and return it. + Returns ------- model : BaseEstimator From 78207d91153bf9c2c7a05bef76772cf63a55ba8d Mon Sep 17 00:00:00 2001 From: Juan Camacho Mohedano Date: Mon, 8 May 2023 20:55:35 +0200 Subject: [PATCH 23/23] revert lines --- skops/card/_model_card.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 2de2ef5f..754c03f2 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -565,6 +565,7 @@ def get_model(self) -> Any: ------- model : BaseEstimator The model instance. + """ if isinstance(self.model, (str, Path)) and hasattr(self, "_model"): hash_obj = sha256()