From faf9e3734ae700886e9b9ac71025e559ae77af13 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Tue, 25 Oct 2022 16:47:20 +0200 Subject: [PATCH 01/26] [skip ci] [WIP] Alternative Card implementation This is a suggestion to address the issues discussed on #72 Description The proposed model card implementation would allow to dynamically add sections or overwrite them. This is not a complete implementation but already covers most of the features we already have and then some. On top of these features, it would be possible to add more features like creating a default Card with placeholders, just like the exisint template, or the possibility to delete existing sections or to retrieve the result of a certain section. Implementation The underlying data structure consists of a dict and a Section dataclass. All data is stored in a _data attribute with the type dict[str, Section]. The dataclass hold the section contents, i.e. the section title, the section content, and subsections, which again have the same type. It's thus recursive data structure. Section title and dict key are identical, which is mostly for convenience. With this refactor, there are no separate data containers anymore for eval results, template sections, extra sections, etc. They are all treated the same. IMHO, this greatly simplifies the code overall. The only complex function that's left is the one needed to traverse the tree holding the data, and even that is just 14 LOC. Demo To see how the new class can be used, take a look at the main function. The resulting Card can be seen here: https://huggingface.co/skops-ci/hf_hub_example-fcc0d6fe-d072-4f94-8fdb-6bf3bb917bca --- skops/card/_card_alternative.py | 361 ++++++++++++++++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 skops/card/_card_alternative.py diff --git a/skops/card/_card_alternative.py b/skops/card/_card_alternative.py new file mode 100644 index 00000000..656ae18c --- /dev/null +++ b/skops/card/_card_alternative.py @@ -0,0 +1,361 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from pathlib import Path +from reprlib import Repr +from typing import Any, Iterator, Protocol + +from huggingface_hub import CardData +from sklearn.utils import estimator_html_repr +from tabulate import tabulate # type: ignore + +from skops.card._model_card import PlotSection, TableSection + +aRepr = Repr() +aRepr.maxother = 79 +aRepr.maxstring = 79 + + +def split_subsection_names(key: str) -> list[str]: + return key.split("/") + + +def _clean_table(table: str) -> str: + # replace line breaks "\n" with html tag
, however, leave end-of-line + # line breaks (eol_lb) intact + eol_lb = "|\n" + placeholder = "$%!?" # arbitrary sting that never appears naturally + table = ( + table.replace(eol_lb, placeholder) + .replace("\n", "
") + .replace(placeholder, eol_lb) + ) + return table + + +@dataclass +class Section: + title: str + content: Formattable | str | None = None + subsections: dict[str, Section] = field(default_factory=dict) + + +class Formattable(Protocol): + def format(self) -> str: + ... + + +class Card: + @classmethod + def make_default( + cls, model, model_diagram: bool = True, metadata: CardData | None = None + ): + """Add a bunch of default sections, yet to be implemented""" + raise NotImplementedError + + def __init__( + self, model, model_diagram: bool = True, metadata: CardData | None = None + ): + self.model = model + self.model_diagram = model_diagram + self.metadata = metadata or CardData() + + self._data: dict[str, Section] = {} + self._metrics: dict[str, float | int] = {} + self._reset() + + def _reset(self) -> None: + self._add_model(self.model) + + model_file = self.metadata.to_dict().get("model_file") + if model_file: + self._add_get_started_code(model_file) + + self._add_model_section() + self._add_hyperparams() + + def add(self, **kwargs: str) -> "Card": + for key, val in kwargs.items(): + self._add_single(key, val) + return self + + def _add_single(self, key: str, val: Formattable | str) -> None: + section = self._data + *subsection_names, leaf_node_name = split_subsection_names(key) + for subsection_name in subsection_names: + section_maybe = section.get(subsection_name) + + # there are already subsections + if section_maybe is not None: + section = section_maybe.subsections + continue + + # no subsection, create + entry = Section(title=subsection_name) + section[subsection_name] = entry + section = entry.subsections + + if leaf_node_name in section: + # entry exists, only overwrite content + section[leaf_node_name].content = val + else: + # entry does not exist, create a new one + section[leaf_node_name] = Section(title=leaf_node_name, content=val) + + def _add_model(self, model) -> None: + model = getattr(self, "model", None) + if model is None: + return + + model_str = self._strip_blank(repr(model)) + model_repr = aRepr.repr(f"model={model_str},").strip('"').strip("'") + self._add_single("Model description", model_repr) + + def _add_model_section(self) -> None: + if not self.model_diagram: + return + + model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(self.model))) + if model_plot_div.count("sk-top-container") == 1: + model_plot_div = model_plot_div.replace( + "sk-top-container", 'sk-top-container" style="overflow: auto;' + ) + self._add_single("Model Plot", model_plot_div) + + def _add_hyperparams(self) -> None: + hyperparameter_dict = self.model.get_params(deep=True) + table = _clean_table( + tabulate( + list(hyperparameter_dict.items()), + headers=["Hyperparameter", "Value"], + tablefmt="github", + ) + ) + self._add_single("Model description/Training Procedure/Hyperparameters", table) + + def add_plot(self, folded=False, **kwargs: str) -> "Card": + for plot_name, plot_path in kwargs.items(): + section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded) + self._add_single(plot_name, section) + return self + + def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> Card: + for key, val in kwargs.items(): + section = TableSection(table=val, folded=folded) + self._add_single(key, section) + return self + + def add_metrics(self, **kwargs: int | float) -> "Card": + self._metrics.update(kwargs) + self._add_metrics(self._metrics) + return self + + def _add_metrics(self, metrics: dict[str, float | int]) -> None: + table = tabulate( + list(metrics.items()), + headers=["Metric", "Value"], + tablefmt="github", + ) + self._add_single("Model description/Evaluation Results", table) + + def _generate_metadata(self, metadata: CardData) -> Iterator[str]: + for key, val in metadata.to_dict().items() if metadata else {}: + if key == "widget": + yield "metadata.widget={...}," + continue + + yield aRepr.repr(f"metadata.{key}={val},").strip('"').strip("'") + + @staticmethod + def _strip_blank(text) -> str: + # remove new lines and multiple spaces + text = text.replace("\n", " ") + text = re.sub(r"\s+", r" ", text) + return text + + def _generate_content(self, data, depth: int = 1) -> Iterator[str]: + for val in data.values(): + title = f"{depth * '#'} {val.title}" + yield title + + if isinstance(val.content, str): + yield val.content + elif val.content is not None: # is Formattable + yield val.content.format() + + if val.subsections: + yield from self._generate_content(val.subsections, depth=depth + 1) + + def __str__(self) -> str: + return self.__repr__() + + def __repr__(self) -> str: + metadata_repr = "\n".join( + " " + line for line in self._generate_metadata(self.metadata) + ) + content_repr = "\n\n".join( + " " + line for line in self._generate_content(self._data) + ) + + complete_repr = "Card(\n" + if metadata_repr: + complete_repr += metadata_repr + "\n" + if content_repr: + complete_repr += content_repr + "\n" + complete_repr += ")" + return complete_repr + + def _add_get_started_code(self, file_name: str, indent: str = " ") -> None: + lines = [ + "import json", + "import pandas as pd", + ] + if file_name.endswith(".skops"): + lines += [ + "from skops.io import load", + f'model = load("{file_name}")', + ] + else: # pickle + lines += [ + "import pickle", + f"with open('{file_name}') as f:", + indent + "model = pickle.load(f)", + ] + + lines += [ + 'with open("config.json") as f:', + indent + "config = json.load(f)", + 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))', + ] + self._add_single("How to Get Started with the Model", "\n".join(lines)) + + def _generate_card(self) -> Iterator[str]: + if self.metadata: + yield f"---\n{self.metadata.to_yaml()}\n---" + + for line in self._generate_content(self._data): + yield "\n" + line + + def save(self, path: str | Path) -> None: + """Save the model card. + + This method renders the model card in markdown format and then saves it + as the specified file. + + Parameters + ---------- + path: str, or Path + Filepath to save your card. + + Notes + ----- + The keys in model card metadata can be seen `here + `__. + """ + with open(path, "w") as f: + f.write("\n".join(self._generate_card())) + + def render(self) -> str: + """Render the final model card as a string. + + Returns + ------- + card : str + The rendered model card with all placeholders filled and all extra + sections inserted. + """ + return "\n".join(self._generate_card()) + + +def main(): + import os + import pickle + import tempfile + from uuid import uuid4 + + import matplotlib.pyplot as plt + import sklearn + from huggingface_hub import HfApi + from sklearn.datasets import load_iris + from sklearn.linear_model import LogisticRegression + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import StandardScaler + + from skops import hub_utils + from skops.card import metadata_from_config + + X, y = load_iris(return_X_y=True, as_frame=True) + + model = Pipeline( + [("scaler", StandardScaler()), ("clf", LogisticRegression(random_state=123))] + ).fit(X, y) + + pkl_file = tempfile.mkstemp(suffix=".pkl", prefix="skops-test")[1] + with open(pkl_file, "wb") as f: + pickle.dump(model, f) + + with tempfile.TemporaryDirectory(prefix="skops-test") as destination_path: + hub_utils.init( + model=pkl_file, + requirements=[f"scikit-learn=={sklearn.__version__}"], + dst=destination_path, + task="tabular-classification", + data=X, + ) + card = Card(model, metadata=metadata_from_config(destination_path)) + + # add a placeholder for figures + card.add(Plots="") + + # add arbitrary sections, overwrite them, etc. + card.add(hi="howdy") + card.add(**{"parent section/child section": "child content"}) + card.add(**{"foo": "bar", "spam": "eggs"}) + # change content of "hi" section + card.add(**{"hi/german": "guten tag", "hi/french": "salut"}) + card.add(**{"very/deeply/nested/section": "but why?"}) + + # add metrics + card.add_metrics(**{"acc": 0.1}) + + # insert the plot in the "Plot" section we inserted above + plt.plot([4, 5, 6, 7]) + plt.savefig(Path(destination_path) / "fig1.png") + card.add_plot(**{"Plots/A beautiful plot": "fig1.png"}) + + # add table + table = {"split": [1, 2, 3], "score": [4, 5, 6]} + card.add_table( + folded=True, + **{"Model description/Training Procedure/Yet another table": table}, + ) + + # more metrics + card.add_metrics(**{"f1": 0.2, "roc": 123}) + + # add content for "Model description" section, which has subsections but + # otherwise no content + card.add(**{"Model description": "This is a fantastic model"}) + + card.save(Path(destination_path) / "README.md") + print(destination_path) + + # pushing to Hub + token = os.environ["HF_HUB_TOKEN"] + repo_name = f"hf_hub_example-{uuid4()}" + user_name = HfApi().whoami(token=token)["name"] + repo_id = f"{user_name}/{repo_name}" + print(f"Creating and pushing to repo: {repo_id}") + hub_utils.push( + repo_id=repo_id, + source=destination_path, + token=token, + commit_message="testing model cards", + create_remote=True, + private=False, + ) + + +if __name__ == "__main__": + main() From cc229a209bfaf272242a656c9ebe9537db75afd1 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 31 Oct 2022 17:57:48 +0100 Subject: [PATCH 02/26] [WIP] Further align new model card design Added a test that shows that the new card produces the same output as the old card (except for a few non-deterministic parts). This includes most of the idiosyncrasies of the old card we might want to change in the future (e.g. inconsistent capitalization, use of empty lines). Some of the more problematic behaviors of the old card class were, however, fixed (e.g. creating an empty metrics table when there are no metrics). The other tests have been reworked to use the new card features to make them more precise. Often, that means that instead of having a very weak test like "assert 'foo' in card.render()", it is now possible to select the exact section and check that it equals the expected output. This work is still unfinished, specifically it still lacks tests for the card repr and for the newly added features. --- skops/card/_card_alternative.py | 174 ++++- skops/card/_model_card.py | 20 +- skops/card/default_template.md | 2 +- skops/card/tests/test_card.py | 2 +- skops/card/tests/test_card_alternative.py | 761 ++++++++++++++++++++++ 5 files changed, 913 insertions(+), 46 deletions(-) create mode 100644 skops/card/tests/test_card_alternative.py diff --git a/skops/card/_card_alternative.py b/skops/card/_card_alternative.py index 656ae18c..8326c10f 100644 --- a/skops/card/_card_alternative.py +++ b/skops/card/_card_alternative.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +import textwrap from dataclasses import dataclass, field from pathlib import Path from reprlib import Repr @@ -17,6 +18,43 @@ aRepr.maxstring = 79 +DEFAULT_TEMPLATE = { + "Model description": "[More Information Needed]", + "Model description/Intended uses & limitations": "[More Information Needed]", + "Model description/Training Procedure/Hyperparameters": """The model is trained with below hyperparameters. + +
+ Click to expand + +{{ hyperparameter_table }} + +
""", + "Model description/Training Procedure/Model Plot": "The model plot is below.", + "Model description/Evaluation Results": """You can find the details about evaluation process and the evaluation results. + + + +[More Information Needed]""", + "How to Get Started with the Model": """Use the code below to get started with the model. + +```python +[More Information Needed] +```""", + "Model Card Authors": """This model card is written by following authors: + +[More Information Needed]""", + "Model Card Contact": """You can contact the model card authors through following channels: +[More Information Needed]""", + "Citation": """Below you can find information related to citation. + +**BibTeX:** +``` +[More Information Needed] +``` +""", +} + + def split_subsection_names(key: str) -> list[str]: return key.split("/") @@ -47,27 +85,24 @@ def format(self) -> str: class Card: - @classmethod - def make_default( - cls, model, model_diagram: bool = True, metadata: CardData | None = None - ): - """Add a bunch of default sections, yet to be implemented""" - raise NotImplementedError - def __init__( - self, model, model_diagram: bool = True, metadata: CardData | None = None + self, + model, + model_diagram: bool = True, + metadata: CardData | None = None, + prefill: bool = True, ): self.model = model self.model_diagram = model_diagram self.metadata = metadata or CardData() self._data: dict[str, Section] = {} - self._metrics: dict[str, float | int] = {} + if prefill: + self._fill_default_sections() + self._metrics: dict[str, str | float | int] = {} self._reset() def _reset(self) -> None: - self._add_model(self.model) - model_file = self.metadata.to_dict().get("model_file") if model_file: self._add_get_started_code(model_file) @@ -75,14 +110,22 @@ def _reset(self) -> None: self._add_model_section() self._add_hyperparams() + def _fill_default_sections(self) -> None: + self.add(**DEFAULT_TEMPLATE) + def add(self, **kwargs: str) -> "Card": for key, val in kwargs.items(): self._add_single(key, val) return self - def _add_single(self, key: str, val: Formattable | str) -> None: + def _select( + self, subsection_names: list[str], create: bool = True + ) -> dict[str, Section]: + """TODO""" section = self._data - *subsection_names, leaf_node_name = split_subsection_names(key) + if not subsection_names: + return section + for subsection_name in subsection_names: section_maybe = section.get(subsection_name) @@ -91,10 +134,31 @@ def _add_single(self, key: str, val: Formattable | str) -> None: section = section_maybe.subsections continue - # no subsection, create - entry = Section(title=subsection_name) - section[subsection_name] = entry - section = entry.subsections + if create: + # no subsection, create + entry = Section(title=subsection_name) + section[subsection_name] = entry + section = entry.subsections + else: + raise KeyError(f"Section titles {subsection_name} does not exist") + + return section + + def select(self, key: str | list[str]) -> Section: + assert key # TODO + + if isinstance(key, str): + subsection_names = split_subsection_names(key) + else: + subsection_names = key + + parent_section = self._select(subsection_names[:-1], create=False) + return parent_section[subsection_names[-1]] + + def _add_single(self, key: str, val: Formattable | str) -> None: + section = self._data + *subsection_names, leaf_node_name = split_subsection_names(key) + section = self._select(subsection_names) if leaf_node_name in section: # entry exists, only overwrite content @@ -121,7 +185,11 @@ def _add_model_section(self) -> None: model_plot_div = model_plot_div.replace( "sk-top-container", 'sk-top-container" style="overflow: auto;' ) - self._add_single("Model Plot", model_plot_div) + template = "The model plot is below.\n\n{}" + self._add_single( + "Model description/Training Procedure/Model Plot", + template.format(model_plot_div), + ) def _add_hyperparams(self) -> None: hyperparameter_dict = self.model.get_params(deep=True) @@ -132,12 +200,26 @@ def _add_hyperparams(self) -> None: tablefmt="github", ) ) - self._add_single("Model description/Training Procedure/Hyperparameters", table) + template = textwrap.dedent( + """ The model is trained with below hyperparameters. + +
+ Click to expand + + {} + +
""" + ) + self._add_single( + "Model description/Training Procedure/Hyperparameters", + template.format(table), + ) def add_plot(self, folded=False, **kwargs: str) -> "Card": - for plot_name, plot_path in kwargs.items(): + for section_name, plot_path in kwargs.items(): + plot_name = split_subsection_names(section_name)[-1] section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded) - self._add_single(plot_name, section) + self._add_single(section_name, section) return self def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> Card: @@ -146,18 +228,25 @@ def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> C self._add_single(key, section) return self - def add_metrics(self, **kwargs: int | float) -> "Card": + def add_metrics(self, **kwargs: str | int | float) -> "Card": self._metrics.update(kwargs) self._add_metrics(self._metrics) return self - def _add_metrics(self, metrics: dict[str, float | int]) -> None: + def _add_metrics(self, metrics: dict[str, str | float | int]) -> None: table = tabulate( list(metrics.items()), headers=["Metric", "Value"], tablefmt="github", ) - self._add_single("Model description/Evaluation Results", table) + template = textwrap.dedent( + """ You can find the details about evaluation process and the evaluation results. + + + + {}""" + ) + self._add_single("Model description/Evaluation Results", template.format(table)) def _generate_metadata(self, metadata: CardData) -> Iterator[str]: for key, val in metadata.to_dict().items() if metadata else {}: @@ -174,7 +263,9 @@ def _strip_blank(text) -> str: text = re.sub(r"\s+", r" ", text) return text - def _generate_content(self, data, depth: int = 1) -> Iterator[str]: + def _generate_content( + self, data: dict[str, Section], depth: int = 1 + ) -> Iterator[str]: for val in data.values(): title = f"{depth * '#'} {val.title}" yield title @@ -207,28 +298,41 @@ def __repr__(self) -> str: return complete_repr def _add_get_started_code(self, file_name: str, indent: str = " ") -> None: - lines = [ + is_skops_format = file_name.endswith(".skops") # else, assume pickle + + lines = ["```python"] + if is_skops_format: + lines += ["from skops.io import load"] + else: + lines += ["import joblib"] + + lines += [ "import json", "import pandas as pd", ] - if file_name.endswith(".skops"): + if is_skops_format: lines += [ "from skops.io import load", f'model = load("{file_name}")', ] else: # pickle - lines += [ - "import pickle", - f"with open('{file_name}') as f:", - indent + "model = pickle.load(f)", - ] + lines += [f"model = joblib.load({file_name})"] lines += [ 'with open("config.json") as f:', indent + "config = json.load(f)", - 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))', + 'model.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))', + "```", ] - self._add_single("How to Get Started with the Model", "\n".join(lines)) + template = textwrap.dedent( + """ Use the code below to get started with the model. + + {} + """ + ) + self._add_single( + "How to Get Started with the Model", template.format("\n".join(lines)) + ) def _generate_card(self) -> Iterator[str]: if self.metadata: @@ -261,7 +365,7 @@ def render(self) -> str: Returns ------- - card : str + result : str The rendered model card with all placeholders filled and all extra sections inserted. """ diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index c978e670..22f782e3 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -395,20 +395,20 @@ def _generate_card(self) -> ModelCard: template_sections["get_started_code"] = ( "from skops.io import load\nimport json\n" "import pandas as pd\n" - f'clf = load("{model_file}")\n' + f'model = load("{model_file}")\n' 'with open("config.json") as f:\n ' " config =" " json.load(f)\n" - 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' + 'model.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' ) else: template_sections["get_started_code"] = ( - "import joblib\nimport json\nimport pandas as pd\nclf =" + "import joblib\nimport json\nimport pandas as pd\nmodel =" f' joblib.load({model_file})\nwith open("config.json") as' " f:\n " " config =" " json.load(f)\n" - 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' + 'model.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' ) if self.model_diagram is True: model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(self.model))) @@ -419,11 +419,13 @@ def _generate_card(self) -> ModelCard: model_plot: str | None = model_plot_div else: model_plot = None - template_sections["eval_results"] = tabulate( - list(self._eval_results.items()), - headers=["Metric", "Value"], - tablefmt="github", - ) + + if self._eval_results: # only add metrics if there are any + template_sections["eval_results"] = tabulate( + list(self._eval_results.items()), + headers=["Metric", "Value"], + tablefmt="github", + ) # if template path is not given, use default if template_sections.get("template_path") is None: diff --git a/skops/card/default_template.md b/skops/card/default_template.md index edbc8d49..91141dfe 100644 --- a/skops/card/default_template.md +++ b/skops/card/default_template.md @@ -29,7 +29,7 @@ The model plot is below. {{ model_plot }} -## Evaluation Results +## Evaluation Results You can find the details about evaluation process and the evaluation results. diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index e2ed4596..4cb6d4ec 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -222,7 +222,7 @@ def test_code_autogeneration_skops( filename = metadata["model_file"] with open(Path(destination_path) / "README.md") as f: read_buffer = f.read() - assert f'clf = load("{filename}")' in read_buffer + assert f'model = load("{filename}")' in read_buffer # test if the model doesn't overflow the huggingface models page assert read_buffer.count("sk-top-container") == 1 diff --git a/skops/card/tests/test_card_alternative.py b/skops/card/tests/test_card_alternative.py new file mode 100644 index 00000000..b723b3e2 --- /dev/null +++ b/skops/card/tests/test_card_alternative.py @@ -0,0 +1,761 @@ +import copy +import os +import pickle +import tempfile +from itertools import zip_longest +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pytest +import sklearn +from huggingface_hub import CardData, metadata_load +from sklearn.datasets import load_iris +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.tree import DecisionTreeClassifier + +import skops +from skops import hub_utils +from skops.card import metadata_from_config +from skops.card._card_alternative import Card +from skops.card._model_card import PlotSection, TableSection +from skops.io import dump + + +def fit_model(): + X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) + y = np.dot(X, np.array([1, 2])) + 3 + reg = LinearRegression().fit(X, y) + return reg + + +@pytest.fixture +def model_card(model_diagram=True): + model = fit_model() + card = Card(model, model_diagram) + yield card + + +@pytest.fixture +def iris_data(): + X, y = load_iris(return_X_y=True, as_frame=True) + yield X, y + + +@pytest.fixture +def iris_estimator(iris_data): + X, y = iris_data + est = LogisticRegression(solver="liblinear").fit(X, y) + yield est + + +@pytest.fixture +def iris_pkl_file(iris_estimator): + pkl_file = tempfile.mkstemp(suffix=".pkl", prefix="skops-test")[1] + with open(pkl_file, "wb") as f: + pickle.dump(iris_estimator, f) + yield pkl_file + + +@pytest.fixture +def iris_skops_file(iris_estimator): + skops_folder = tempfile.mkdtemp() + model_name = "model.skops" + skops_path = Path(skops_folder) / model_name + dump(iris_estimator, skops_path) + yield skops_path + + +def _create_model_card_from_saved_model( + destination_path, + iris_estimator, + iris_data, + save_file, +): + X, y = iris_data + hub_utils.init( + model=save_file, + requirements=[f"scikit-learn=={sklearn.__version__}"], + dst=destination_path, + task="tabular-classification", + data=X, + ) + card = Card(iris_estimator, metadata=metadata_from_config(destination_path)) + card.save(Path(destination_path) / "README.md") + return card + + +@pytest.fixture +def skops_model_card_metadata_from_config( + destination_path, iris_estimator, iris_skops_file, iris_data +): + yield _create_model_card_from_saved_model( + destination_path, iris_estimator, iris_data, iris_skops_file + ) + + +@pytest.fixture +def pkl_model_card_metadata_from_config( + destination_path, iris_estimator, iris_pkl_file, iris_data +): + yield _create_model_card_from_saved_model( + destination_path, iris_estimator, iris_data, iris_pkl_file + ) + + +@pytest.fixture +def destination_path(): + with tempfile.TemporaryDirectory(prefix="skops-test") as dir_path: + yield Path(dir_path) + + +def test_save_model_card(destination_path, model_card): + model_card.save(Path(destination_path) / "README.md") + assert (Path(destination_path) / "README.md").exists() + + +def test_select_existing_section(): + # TODO + pass + + +def test_select_non_existing_section_raises(): + # TODO + pass + + +def test_hyperparameter_table(destination_path, model_card): + section_name = "Model description/Training Procedure/Hyperparameters" + text_hyperparams = model_card.select(section_name).content + expected = "\n".join( + [ + "The model is trained with below hyperparameters.", + "", + "
", + " Click to expand ", + "", + "| Hyperparameter | Value |", + "|------------------|------------|", + "| copy_X | True |", + "| fit_intercept | True |", + "| n_jobs | |", + "| normalize | deprecated |", + "| positive | False |", + "", + "
", + ] + ) + assert text_hyperparams == expected + + +def _strip_multiple_chars(text, char): + # _strip_multiple_chars("hi there") == "hi there" + # _strip_multiple_chars("|---|--|", "-") == "|-|-|" + while char + char in text: + text = text.replace(char + char, char) + return text + + +def test_hyperparameter_table_with_line_break(destination_path): + # Hyperparameters can contain values with line breaks, "\n", in them. In + # that case, the markdown table is broken. Check that the hyperparameter + # table we create properly replaces the "\n" with "
". + class EstimatorWithLbInParams: + def get_params(self, deep=False): + return {"fit_intercept": True, "n_jobs": "line\nwith\nbreak"} + + model_card = Card(EstimatorWithLbInParams()) + section_name = "Model description/Training Procedure/Hyperparameters" + text_hyperparams = model_card.select(section_name).content + + # remove multiple whitespaces, as they're not important + text_cleaned = _strip_multiple_chars(text_hyperparams, " ") + assert "| n_jobs | line
with
break |" in text_cleaned + + +def test_plot_model(destination_path, model_card): + text_plot = model_card.select( + "Model description/Training Procedure/Model Plot" + ).content + # don't compare whole text, as it's quite long and non-deterministic + assert text_plot.startswith("The model plot is below.\n\n