diff --git a/examples/plot_hf_hub.py b/examples/plot_hf_hub.py index d1fb4f4c..819e539d 100644 --- a/examples/plot_hf_hub.py +++ b/examples/plot_hf_hub.py @@ -15,12 +15,12 @@ import json import os import pickle +from pathlib import Path from tempfile import mkdtemp, mkstemp from uuid import uuid4 import sklearn from huggingface_hub import HfApi -from modelcards import CardData from sklearn.datasets import load_breast_cancer from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.experimental import enable_halving_search_cv # noqa @@ -86,9 +86,9 @@ # %% # Model Card # ========== -card_data = CardData(tags=["tabular-classification"]) -model_card = card.create_model_card(model, card_data) -model_card.save(os.path.join(f"{local_repo}", "README.md")) +# We will now create a model card and save it +model_card = card.Card(model) +model_card.save(Path(local_repo) / "README.md") # %% # Push to Hub diff --git a/examples/plot_model_card.py b/examples/plot_model_card.py index 5531d80b..1d504f81 100644 --- a/examples/plot_model_card.py +++ b/examples/plot_model_card.py @@ -11,15 +11,15 @@ # ======= # First we will import everything required for the rest of this document. -import os import pickle +from pathlib import Path from tempfile import mkdtemp, mkstemp import sklearn -from modelcards import CardData from sklearn.datasets import load_breast_cancer from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.experimental import enable_halving_search_cv # noqa +from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix from sklearn.model_selection import HalvingGridSearchCV, train_test_split from skops import card, hub_utils @@ -56,53 +56,71 @@ ).fit(X_train, y_train) model.score(X_test, y_test) + +# %% +# Initialize a repository to save our files in +# ============================================ +# We will now initialize a repository and save our model +_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") + +with open(pkl_name, mode="bw") as f: + pickle.dump(model, file=f) + +local_repo = mkdtemp(prefix="skops-") + +hub_utils.init( + model=pkl_name, requirements=[f"scikit-learn={sklearn.__version__}"], dst=local_repo +) + # %% # Create a model card # ==================== -# We now create a model card, set couple of attributes and save it. -# We first set the metadata with CardData and pass it to create_model_card. -# Then, we pass information other than metadata in kwargs. -# We'll initialize a local repository and save the card with the model in it. +# We now create a model card. +# Then, we pass information using ``add()`` and plots using ``add_plot()``. +# We'll then save the card as `README.md`. + +model_card = card.Card(model) + +# %% +# Pass information and plots to our model card +# ============================================ +# We will pass information to fill our model card. +# We will add plots to our card, note that these plots don't necessarily +# have to have a section in our template. +# We will save the plots, and then pass plot name with path to ``add_inspection``. + +license = "mit" limitations = "This model is not ready to be used in production." model_description = ( "This is a HistGradientBoostingClassifier model trained on breast cancer dataset." " It's trained with Halving Grid Search Cross Validation, with parameter grids on" " max_leaf_nodes and max_depth." ) -license = "mit" - -card_data = CardData( - license=license, - tags=["tabular-classification"], - datasets="breast-cancer", - metrics=["acc"], -) - model_card_authors = "skops_user" get_started_code = ( "import pickle\nwith open(dtc_pkl_filename, 'rb') as file:\nclf = pickle.load(file)" ) -citation = "bibtex\n@inproceedings{...,year={2020}}" - -model_card = card.create_model_card( - model, - card_data=card_data, +citation_bibtex = "bibtex\n@inproceedings{...,year={2020}}" +model_card.add( + citation_bibtex=citation_bibtex, + get_started_code=get_started_code, + model_card_authors=model_card_authors, limitations=limitations, model_description=model_description, - citation_bibtex=citation, - model_card_authors=model_card_authors, - get_started_code=get_started_code, ) +y_pred = model.predict(X_test) +cm = confusion_matrix(y_test, y_pred, labels=model.classes_) +disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_) +disp.plot() -_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") +disp.figure_.savefig(Path(local_repo) / "confusion_matrix.png") -with open(pkl_name, mode="bw") as f: - pickle.dump(model, file=f) +model_card.add_plot(**{"confusion matrix": "confusion_matrix.png"}) -local_repo = mkdtemp(prefix="skops-") -hub_utils.init( - model=pkl_name, requirements=[f"scikit-learn={sklearn.__version__}"], dst=local_repo -) +# %% +# Save model card +# =============== +# We can simply save our model card by providing a path to ``save()`` -model_card.save(os.path.join(f"{local_repo}", "README.md")) +model_card.save(Path(local_repo) / "README.md") diff --git a/skops/card/__init__.py b/skops/card/__init__.py index df78d1e7..bcdc27a7 100644 --- a/skops/card/__init__.py +++ b/skops/card/__init__.py @@ -1,3 +1,3 @@ -from ._model_card import create_model_card +from ._model_card import Card -__all__ = ["create_model_card"] +__all__ = ["Card"] diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 49b7f1ac..75b96327 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -1,61 +1,186 @@ -import os +import copy import re +import shutil +import tempfile +from pathlib import Path -from modelcards import ModelCard +from modelcards import CardData, ModelCard from sklearn.utils import estimator_html_repr import skops -def _extract_estimator_config(model): - """Extracts estimator configuration and renders them into a vertical table. - - Parameters +class Card: + """Model card class that will be used to generate model card. + This class can be used to write information and plots to model card and save + it. This class by default generates an interactive plot of the model and a + table of hyperparameters. The slots to be filled are defined in the markdown + template. + Parameters: ---------- - model (estimator): scikit-learn pipeline or model. + model: estimator object + Model that will be documented. + model_diagram: bool, optional + Set to True if model diagram should be plotted in the card. + Notes + ----- + You can pass your own custom template using ``add`` method. You can add + plots to the model card template using ``add_plot``. The key you pass to + ``add_plot`` will be used for header of the plot. - Returns - ------- - str: - Markdown table of hyperparameters. + Examples + -------- + >>> from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix + >>> from sklearn.datasets import load_iris + >>> from sklearn.linear_model import LogisticRegression + >>> from skops import card + >>> X, y = load_iris(return_X_y=True) + >>> model = LogisticRegression(random_state=0).fit(X, y) + >>> model_card = card.Card(model) + >>> model_card.add(license="mit") # doctest: +ELLIPSIS + + >>> y_pred = model.predict(X) + >>> cm = confusion_matrix(y, y_pred,labels=model.classes_) + >>> disp = ConfusionMatrixDisplay(confusion_matrix=cm, + ... display_labels=model.classes_) + >>> disp.plot() # doctest: +ELLIPSIS + + >>> disp.figure_.savefig("confusion_matrix.png") + ... + >>> model_card.add_plot(confusion_matrix="confusion_matrix.png") # doctest: +ELLIPSIS + + >>> model_card.save((Path("save_dir") / "README.md")) # doctest: +ELLIPSIS + ... """ - hyperparameter_dict = model.get_params(deep=True) - table = "| Hyperparameters | Value |\n| :-- | :-- |\n" - for hyperparameter, value in hyperparameter_dict.items(): - table += f"| {hyperparameter} | {value} |\n" - return table + def __init__(self, model, model_diagram=True): + self.model = model + self.hyperparameter_table = self._extract_estimator_config() + # the spaces in the pipeline breaks markdown, so we replace them + if model_diagram is True: + self.model_plot = re.sub(r"\n\s+", "", str(estimator_html_repr(model))) + else: + self.model_plot = None + self.template_sections = {} + self._figure_paths = {} -def create_model_card( - model, - card_data, - **card_kwargs, -): - """Creates a model card for the model and saves it to the target directory. + def add(self, **kwargs): + """Takes values to fill model card template. + Parameters: + ---------- + **kwargs : dict + Parameters to be set for the model card. These parameters + need to be sections of the underlying `jinja` template used. + Returns: + -------- + self : object + Card object. + """ + for section, value in kwargs.items(): + self.template_sections[section] = value + return self - Parameters: - ---------- - model: estimator - scikit-learn compatible estimator. - card_data: CardData - CardData object. - card_kwargs: - Card kwargs are information you can pass to fill in the sections of the - card template, e.g. model_description, citation_bibtex, get_started_code. - """ - ROOT = skops.__path__ - model_plot = re.sub(r"\n\s+", "", str(estimator_html_repr(model))) - hyperparameter_table = _extract_estimator_config(model) - card_data.library_name = "sklearn" - template_path = card_kwargs.get("template_path") - if template_path is None: - template_path = os.path.join(f"{ROOT[0]}", "card", "default_template.md") - card_kwargs["template_path"] = template_path - card = ModelCard.from_template( - card_data=card_data, - hyperparameter_table=hyperparameter_table, - model_plot=model_plot, - **card_kwargs, - ) - - return card + def add_plot(self, **kwargs): + """Add plots to the model card. + + Parameters: + ---------- + **kwargs : dict + The arguments should be of the form `name=plot_path`, where `name` + is the name of the plot and `plot_path` is the path to the plot, + relative to the root of the project. The plots should have already + been saved under the project's folder. + Returns: + -------- + self : object + Card object. + """ + for plot_name, plot_path in kwargs.items(): + self._figure_paths[plot_name] = plot_path + return self + + def save(self, path): + """Save the model card. + + This method renders the model card in mardown format and then saves it + as the specified file. + + Parameters: + ---------- + path: str, or Path + filepath to save your card. + + Notes + ----- + The keys in model card metadata can be seen + [here](https://huggingface.co/docs/hub/models-cards#model-card-metadata). + """ + root = skops.__path__ + + template_sections = copy.deepcopy(self.template_sections) + + metadata_keys = [ + "language", + "license", + "library_name", + "tags", + "datasets", + "model_name", + "metrics", + "model-index", + ] + card_data_keys = {} + + # if key is supposed to be in metadata and is provided by user, write it to card_data_keys + for key in template_sections.keys() & metadata_keys: + card_data_keys[key] = template_sections.pop(key, "") + + # construct CardData + card_data = CardData(**card_data_keys) + card_data.library_name = "sklearn" + + # if template path is not given, use default + if template_sections.get("template_path") is None: + template_sections["template_path"] = ( + Path(root[0]) / "card" / "default_template.md" + ) + + # copying the template so that the original template is not touched/changed + # append plot_name if any plots are provided, at the end of the template + with tempfile.TemporaryDirectory() as tmpdirname: + shutil.copyfile( + template_sections["template_path"], + f"{tmpdirname}/temporary_template.md", + ) + # create a temporary template with the additional plots + template_sections["template_path"] = f"{tmpdirname}/temporary_template.md" + # add plots at the end of the template + with open(template_sections["template_path"], "a") as template: + for plot in self._figure_paths: + template.write( + f"\n\n{plot}\n" + f"![{plot}]({self._figure_paths[plot]})\n\n" + ) + + card = ModelCard.from_template( + card_data=card_data, + hyperparameter_table=self.hyperparameter_table, + model_plot=self.model_plot, + **template_sections, + ) + + card.save(path) + + def _extract_estimator_config(self): + """Extracts estimator hyperparameters and renders them into a vertical table. + + Returns + ------- + str: + Markdown table of hyperparameters. + """ + + hyperparameter_dict = self.model.get_params(deep=True) + table = "| Hyperparameters | Value |\n| :-- | :-- |\n" + for hyperparameter, value in hyperparameter_dict.items(): + table += f"| {hyperparameter} | {value} |\n" + return table diff --git a/skops/card/default_template.md b/skops/card/default_template.md index 4664373f..65275c1f 100644 --- a/skops/card/default_template.md +++ b/skops/card/default_template.md @@ -62,5 +62,6 @@ You can contact the model card authors through following channels: Below you can find information related to citation. **BibTeX:** - +``` {{ citation_bibtex | default("[More Information Needed]", true)}} +``` diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index a5fb2fe9..710f40c7 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -1,11 +1,14 @@ -import os +import copy import tempfile +from pathlib import Path +import matplotlib.pyplot as plt import numpy as np -from modelcards import CardData +import pytest from sklearn.linear_model import LinearRegression -from skops.card import create_model_card +import skops +from skops.card import Card def fit_model(): @@ -15,45 +18,92 @@ def fit_model(): return reg -def generate_card(): +@pytest.fixture +def model_card(model_diagram=True): model = fit_model() - card_data = CardData(library_name="sklearn") + card = Card(model, model_diagram) + yield card - model_card = create_model_card( - model, - card_data, - template_path="skops/card/default_template.md", - model_description="sklearn FTW", - ) - return model_card - -def test_write_model_card(): +@pytest.fixture +def destination_path(): with tempfile.TemporaryDirectory(prefix="skops-test") as dir_path: - model = fit_model() - card_data = CardData(library_name="sklearn") - model_card = create_model_card( - model, card_data=card_data, model_description="sklearn FTW" - ) - model_card.save(os.path.join(f"{dir_path}", "README.md")) - with open(os.path.join(f"{dir_path}", "README.md"), "r") as f: - model_card = f.read() - assert "sklearn FTW" in model_card + yield Path(dir_path) -def test_hyperparameter_table(): - with tempfile.TemporaryDirectory(prefix="skops-test") as dir_path: - model_card = generate_card() - model_card.save(os.path.join(f"{dir_path}", "README.md")) - with open(os.path.join(f"{dir_path}", "README.md"), "r") as f: - model_card = f.read() - assert "fit_intercept" in model_card +def test_save_model_card(destination_path, model_card): + model_card.save(Path(destination_path) / "README.md") + assert (Path(destination_path) / "README.md").exists() -def test_plot_model(): - with tempfile.TemporaryDirectory(prefix="skops-test") as dir_path: - model_card = generate_card() - model_card.save(os.path.join(f"{dir_path}", "README.md")) - with open(os.path.join(f"{dir_path}", "README.md"), "r") as f: - model_card = f.read() +def test_hyperparameter_table(destination_path, model_card): + model_card.save(Path(destination_path) / "README.md") + with open(Path(destination_path) / "README.md", "r") as f: + model_card = f.read() + assert "fit_intercept" in model_card + + +def test_plot_model(destination_path, model_card): + model_card.save(Path(destination_path) / "README.md") + with open(Path(destination_path) / "README.md", "r") as f: + model_card = f.read() assert "