From 8510d9e1f51c4b3ab370bddff99c744c8877c52f Mon Sep 17 00:00:00 2001 From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com> Date: Wed, 5 Oct 2022 20:22:08 +0530 Subject: [PATCH 1/6] load model in Card class --- skops/card/_model_card.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index c978e670..76647ae4 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -254,7 +254,7 @@ def __init__( model_diagram: bool = True, metadata: Optional[CardData] = None, ) -> None: - self.model = model + self.model = self._load_model(model) self.model_diagram = model_diagram self._eval_results = {} # type: ignore self._template_sections: dict[str, str] = {} @@ -373,6 +373,26 @@ def add_metrics(self, **kwargs: str) -> "Card": self._eval_results[metric] = value return self + def _load_model(model: Any) -> Any: + """Loads the model if provided a file path. + + Parameters + ---------- + model : Any + Str or model instance. + + Returns + ------- + model : object + Model instance. + """ + if isinstance(model, str): + model_path = Path(model) + if not model_path.exists(): + raise ValueError("Model file does not exist") + model = skops.io.load(model) + return model + def _generate_card(self) -> ModelCard: """Generate the ModelCard object From 4367c134a90c6c5ac04b3ac94c55074d942b694a Mon Sep 17 00:00:00 2001 From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com> Date: Wed, 5 Oct 2022 21:23:54 +0530 Subject: [PATCH 2/6] doc failing resolution --- skops/card/_model_card.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 76647ae4..47ffbeba 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -373,7 +373,7 @@ def add_metrics(self, **kwargs: str) -> "Card": self._eval_results[metric] = value return self - def _load_model(model: Any) -> Any: + def _load_model(self, model: Any) -> Any: """Loads the model if provided a file path. Parameters From 8f919a04febd96cdf4dc0f0f3b3d6744258fcf9b Mon Sep 17 00:00:00 2001 From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com> Date: Thu, 6 Oct 2022 18:18:43 +0530 Subject: [PATCH 3/6] support for pkl model file --- skops/card/_model_card.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 47ffbeba..3a9e111f 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -5,6 +5,7 @@ import re import shutil import tempfile +import joblib from dataclasses import dataclass from pathlib import Path from reprlib import Repr @@ -390,7 +391,12 @@ def _load_model(self, model: Any) -> Any: model_path = Path(model) if not model_path.exists(): raise ValueError("Model file does not exist") - model = skops.io.load(model) + if model_path.suffix==".pkl": + model = joblib.load(model_path) + elif model_path.suffix==".skops": + model = skops.io.load(model_path) + else: + raise ValueError("Model Format not supported") return model def _generate_card(self) -> ModelCard: From da8d30260dc619d97d1ba35faf796259e54aa870 Mon Sep 17 00:00:00 2001 From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com> Date: Thu, 20 Oct 2022 21:40:29 +0530 Subject: [PATCH 4/6] test addtion for card class --- skops/card/_model_card.py | 112 +++++++++++++++++++++++++--------- skops/card/tests/test_card.py | 26 ++++---- 2 files changed, 97 insertions(+), 41 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 3a9e111f..2d0d64ee 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -16,6 +16,7 @@ from tabulate import tabulate # type: ignore import skops +from skops.io import load # Repr attributes can be used to control the behavior of repr aRepr = Repr() @@ -26,7 +27,10 @@ def wrap_as_details(text: str, folded: bool) -> str: if not folded: return text - return f"
\n Click to expand \n\n{text}\n\n
" + return ( + "
\n Click to expand" + f" \n\n{text}\n\n
" + ) def _clean_table(table: str) -> str: @@ -93,7 +97,9 @@ def format(self) -> str: headers = self.table.keys() table = _clean_table( - tabulate(self.table, tablefmt="github", headers=headers, showindex=False) + tabulate( + self.table, tablefmt="github", headers=headers, showindex=False + ) ) return wrap_as_details(table, folded=self.folded) @@ -151,7 +157,9 @@ def metadata_from_config(config_path: Union[str, Path]) -> CardData: task = config.get("sklearn", {}).get("task", None) if task: card_data.tags += [task] - card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file") + card_data.model_file = ( + config.get("sklearn", {}).get("model", {}).get("file") + ) example_input = config.get("sklearn", {}).get("example_input", None) # Documentation on what the widget expects: # https://huggingface.co/docs/hub/models-widgets-examples @@ -173,7 +181,7 @@ class Card: Parameters ---------- - model: estimator object + model: pathlib.path, str, or sklearn estimator object Model that will be documented. model_diagram: bool, default=True @@ -255,7 +263,7 @@ def __init__( model_diagram: bool = True, metadata: Optional[CardData] = None, ) -> None: - self.model = self._load_model(model) + self._model = model self.model_diagram = model_diagram self._eval_results = {} # type: ignore self._template_sections: dict[str, str] = {} @@ -303,11 +311,15 @@ def add_plot(self, folded=False, **kwargs: str) -> "Card": Card object. """ for plot_name, plot_path in kwargs.items(): - section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded) + section = PlotSection( + alt_text=plot_name, path=plot_path, folded=folded + ) self._extra_sections.append((plot_name, section)) return self - def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> Card: + def add_table( + self, folded: bool = False, **kwargs: dict["str", list[Any]] + ) -> Card: """Add a table to the model card. Add a table to the model card. This can be especially useful when you @@ -374,29 +386,54 @@ def add_metrics(self, **kwargs: str) -> "Card": self._eval_results[metric] = value return self + @property + def model(self): + model = self._load_model(self._model) + if model is not self._model: + self._model = model + return model + + @model.setter + def model(self, model): + self._model = model + + @model.deleter + def model(self): + del self._model + def _load_model(self, model: Any) -> Any: - """Loads the model if provided a file path. + """Loads the model if provided a file path, if already a model instance, + return it unmodified. Parameters ---------- - model : Any - Str or model instance. + model : pathlib.path, str, or sklearn estimator + Path/str or the actual model instance. If a Path or str, loads the model on first call. Returns ------- - model : object + model : object Model instance. + """ - if isinstance(model, str): - model_path = Path(model) - if not model_path.exists(): - raise ValueError("Model file does not exist") - if model_path.suffix==".pkl": - model = joblib.load(model_path) - elif model_path.suffix==".skops": - model = skops.io.load(model_path) - else: - raise ValueError("Model Format not supported") + if not isinstance(model, (Path, str)): + return model + + model_path = Path(model) + if not model_path.exists(): + raise ValueError("Model file does not exist") + + if model_path.suffix in (".pkl", ".pickle"): + model = joblib.load(model_path) + elif model_path.suffix == ".skops": + model = load(model_path) + else: + msg = ( + f"Cannot interpret model suffix {model_path.suffix}, should be" + " '.pkl', '.pickle' or '.skops'" + ) + raise ValueError(msg) + return model def _generate_card(self) -> ModelCard: @@ -429,18 +466,23 @@ def _generate_card(self) -> ModelCard: ) else: template_sections["get_started_code"] = ( - "import joblib\nimport json\nimport pandas as pd\nclf =" - f' joblib.load({model_file})\nwith open("config.json") as' + "import joblib\nimport json\nimport pandas as" + " pd\nclf =" + f" joblib.load({model_file})\nwith" + ' open("config.json") as' " f:\n " " config =" " json.load(f)\n" 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' ) if self.model_diagram is True: - model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(self.model))) + model_plot_div = re.sub( + r"\n\s+", "", str(estimator_html_repr(self.model)) + ) if model_plot_div.count("sk-top-container") == 1: model_plot_div = model_plot_div.replace( - "sk-top-container", 'sk-top-container" style="overflow: auto;' + "sk-top-container", + 'sk-top-container" style="overflow: auto;', ) model_plot: str | None = model_plot_div else: @@ -465,7 +507,9 @@ def _generate_card(self) -> ModelCard: f"{tmpdirname}/temporary_template.md", ) # create a temporary template with the additional plots - template_sections["template_path"] = f"{tmpdirname}/temporary_template.md" + template_sections[ + "template_path" + ] = f"{tmpdirname}/temporary_template.md" # add extra sections at the end of the template with open(template_sections["template_path"], "a") as template: if self._extra_sections: @@ -546,13 +590,17 @@ def __repr__(self) -> str: model = getattr(self, "model", None) if model: model_str = self._strip_blank(repr(model)) - model_repr = aRepr.repr(f" model={model_str},").strip('"').strip("'") + model_repr = ( + aRepr.repr(f" model={model_str},").strip('"').strip("'") + ) else: model_repr = None # metadata metadata_reprs = [] - for key, val in self.metadata.to_dict().items() if self.metadata else {}: + for key, val in ( + self.metadata.to_dict().items() if self.metadata else {} + ): if key == "widget": metadata_reprs.append(" metadata.widget={...},") continue @@ -566,14 +614,18 @@ def __repr__(self) -> str: template_reprs = [] for key, val in self._template_sections.items(): val = self._strip_blank(repr(val)) - template_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'")) + template_reprs.append( + aRepr.repr(f" {key}={val},").strip('"').strip("'") + ) template_repr = "\n".join(template_reprs) # figures figure_reprs = [] for key, val in self._extra_sections: val = self._strip_blank(repr(val)) - figure_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'")) + figure_reprs.append( + aRepr.repr(f" {key}={val},").strip('"').strip("'") + ) figure_repr = "\n".join(figure_reprs) complete_repr = "Card(\n" diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index e2ed4596..f5f2fb4e 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -77,7 +77,9 @@ def _create_model_card_from_saved_model( task="tabular-classification", data=X, ) - card = Card(iris_estimator, metadata=metadata_from_config(destination_path)) + card = Card( + iris_estimator, metadata=metadata_from_config(destination_path) + ) card.save(Path(destination_path) / "README.md") return card @@ -206,7 +208,9 @@ def test_add_metrics(destination_path, model_card): assert ("acc" in card) and ("f1" in card) and ("0.1" in card) -def test_code_autogeneration(destination_path, pkl_model_card_metadata_from_config): +def test_code_autogeneration( + destination_path, pkl_model_card_metadata_from_config +): # test if getting started code is automatically generated metadata = metadata_load(local_path=Path(destination_path) / "README.md") filename = metadata["model_file"] @@ -288,14 +292,12 @@ def test_very_long_lines_are_shortened(self, card: Card, meth): card.add(my_section="very long line " * 100) result = meth(card) expected = ( - "Card(\n model=LinearRegression(fit_intercept=False),\n" - " model_description='A description',\n model_card_authors='Jane Doe',\n" - " my_section='very long line very lon...line very long line very long line" - " ',\n" - " roc_curve='ROC_curve.png',\n" - " confusion_matrix='confusion_matrix.jpg',\n" - " search_results=Table(3x2),\n" - ")" + "Card(\n model=LinearRegression(fit_intercept=False),\n " + " model_description='A description',\n model_card_authors='Jane" + " Doe',\n my_section='very long line very lon...line very long" + " line very long line ',\n roc_curve='ROC_curve.png',\n " + " confusion_matrix='confusion_matrix.jpg',\n " + " search_results=Table(3x2),\n)" ) assert result == expected @@ -412,7 +414,9 @@ def test_format_path_is_str(self): assert section.format() == expected def test_format_path_is_pathlib(self): - section = PlotSection(alt_text="some title", path=Path("path") / "plot.png") + section = PlotSection( + alt_text="some title", path=Path("path") / "plot.png" + ) expected = f"![some title](path{os.path.sep}plot.png)" assert section.format() == expected From 4051cff18518a1a78d89995674c834f4a3ec44f3 Mon Sep 17 00:00:00 2001 From: know-one-1 Date: Tue, 25 Oct 2022 17:24:27 +0530 Subject: [PATCH 5/6] Revert "test addtion for card class" This reverts commit d6c98569ae4f6c3263501d9dda3714cfdbfb116c. --- skops/card/tests/test_card.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index f5f2fb4e..4c255ce7 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -211,6 +211,7 @@ def test_add_metrics(destination_path, model_card): def test_code_autogeneration( destination_path, pkl_model_card_metadata_from_config ): + # test if getting started code is automatically generated metadata = metadata_load(local_path=Path(destination_path) / "README.md") filename = metadata["model_file"] From 3c93add5987347746cf68894184eafa5d8d0fe66 Mon Sep 17 00:00:00 2001 From: know-one-1 Date: Tue, 25 Oct 2022 22:29:32 +0530 Subject: [PATCH 6/6] load model from path functionality and test addition --- skops/card/tests/test_card.py | 398 +++++++++++++++++++++++++++++++++- 1 file changed, 390 insertions(+), 8 deletions(-) diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 4c255ce7..a92ebe47 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -33,6 +33,19 @@ def model_card(model_diagram=True): yield card +@pytest.fixture +def model_card_from_path(suffix, model_diagram=True): + model = fit_model() + save_file = tempfile.mkstemp(suffix=suffix, prefix="skops-test")[1] + if suffix in (".pkl", ".pickle"): + with open(save_file, "wb") as f: + pickle.dump(model, f) + elif suffix == ".skops": + dump(model, save_file) + card = Card(save_file, model_diagram) + yield card + + @pytest.fixture def iris_data(): X, y = load_iris(return_X_y=True, as_frame=True) @@ -77,9 +90,25 @@ def _create_model_card_from_saved_model( task="tabular-classification", data=X, ) - card = Card( - iris_estimator, metadata=metadata_from_config(destination_path) + card = Card(iris_estimator, metadata=metadata_from_config(destination_path)) + card.save(Path(destination_path) / "README.md") + return card + + +def _create_model_card_from_model_path( + destination_path, + iris_data, + save_file, +): + X, y = iris_data + hub_utils.init( + model=save_file, + requirements=[f"scikit-learn=={sklearn.__version__}"], + dst=destination_path, + task="tabular-classification", + data=X, ) + card = Card(save_file, metadata=metadata_from_config(destination_path)) card.save(Path(destination_path) / "README.md") return card @@ -102,6 +131,22 @@ def pkl_model_card_metadata_from_config( ) +@pytest.fixture +def skops_model_card_from_path_metadata_from_config( + destination_path, iris_skops_file, iris_data +): + yield _create_model_card_from_model_path( + destination_path, iris_data, iris_skops_file + ) + + +@pytest.fixture +def pkl_model_card_from_path_metadata_from_config( + destination_path, iris_pkl_file, iris_data +): + yield _create_model_card_from_model_path(destination_path, iris_data, iris_pkl_file) + + @pytest.fixture def destination_path(): with tempfile.TemporaryDirectory(prefix="skops-test") as dir_path: @@ -113,11 +158,23 @@ def test_save_model_card(destination_path, model_card): assert (Path(destination_path) / "README.md").exists() +@pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"]) +def test_save_model_card_from_path(destination_path, model_card_from_path): + model_card_from_path.save(Path(destination_path) / "README.md") + assert (Path(destination_path) / "README.md").exists() + + def test_hyperparameter_table(destination_path, model_card): model_card = model_card.render() assert "fit_intercept" in model_card +@pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"]) +def test_hyperparameter_table_from_path(model_card_from_path): + model_card_from_path = model_card_from_path.render() + assert "fit_intercept" in model_card_from_path + + def _strip_multiple_chars(text, char): # _strip_multiple_chars("hi there") == "hi there" # _strip_multiple_chars("|---|--|", "-") == "|-|-|" @@ -146,17 +203,44 @@ def test_plot_model(destination_path, model_card): assert "