Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
b0049bd
refactored card class
merveenoyan Jul 18, 2022
1d34c6f
updated example
merveenoyan Jul 18, 2022
e8d27e4
add function and removed adding sections in save
merveenoyan Jul 19, 2022
c9414b4
added plots & more
merveenoyan Jul 20, 2022
6000294
addressed comments
merveenoyan Jul 20, 2022
c9331ae
Update skops/card/_model_card.py
merveenoyan Jul 20, 2022
4901ee0
added docs and fixed tests
merveenoyan Jul 20, 2022
1ba069a
fix test
merveenoyan Jul 20, 2022
7bd29b1
added temporary plot
merveenoyan Jul 21, 2022
8236b6c
make html for docs
merveenoyan Jul 21, 2022
ac466e3
Update examples/plot_model_card.py
merveenoyan Jul 22, 2022
985a971
Update skops/card/_model_card.py
merveenoyan Jul 22, 2022
8b852c3
Update skops/card/_model_card.py
merveenoyan Jul 22, 2022
df7ad96
Update skops/card/_model_card.py
merveenoyan Jul 22, 2022
456acbd
Update examples/plot_model_card.py
merveenoyan Jul 22, 2022
73dfed6
Update skops/card/tests/test_card.py
merveenoyan Jul 22, 2022
3eafa1e
Update examples/plot_model_card.py
merveenoyan Jul 22, 2022
ed17c0e
Update examples/plot_model_card.py
merveenoyan Jul 22, 2022
449c6ca
Update examples/plot_model_card.py
merveenoyan Jul 22, 2022
a93816d
Update skops/card/_model_card.py
merveenoyan Jul 22, 2022
8fddf62
Update skops/card/_model_card.py
merveenoyan Jul 22, 2022
2428a10
Update skops/card/_model_card.py
merveenoyan Jul 22, 2022
2bd553e
Update skops/card/_model_card.py
merveenoyan Jul 22, 2022
c384f59
addressed comments + cleanup
merveenoyan Jul 22, 2022
6514e6f
added documentation for card class
merveenoyan Jul 22, 2022
522d5f4
Update examples/plot_model_card.py
merveenoyan Jul 25, 2022
04da3b6
Update skops/card/_model_card.py
merveenoyan Jul 25, 2022
7479cc8
Update skops/card/_model_card.py
merveenoyan Jul 25, 2022
a8789b5
Update skops/card/_model_card.py
merveenoyan Jul 25, 2022
1aa996e
Update examples/plot_model_card.py
merveenoyan Jul 25, 2022
611c7ca
replaced Path, added fixture, misc nits
merveenoyan Jul 25, 2022
3a0f9a9
replaced Path, added fixture, misc nits
merveenoyan Jul 25, 2022
b921994
removed unnecessary assert
merveenoyan Jul 25, 2022
c312a1f
added plotting option for diagram and associated test
merveenoyan Jul 25, 2022
b7f77d9
make html
merveenoyan Jul 25, 2022
f379b83
fix docs
merveenoyan Jul 25, 2022
535e698
fix docstring user guide
merveenoyan Jul 25, 2022
e9908f3
fix docstring
merveenoyan Jul 25, 2022
a2aaaa7
fix docstring
merveenoyan Jul 25, 2022
5f4b0cc
fix docstring
merveenoyan Jul 25, 2022
27051f4
fix docstring
merveenoyan Jul 25, 2022
d6ded6b
fix docstring
merveenoyan Jul 25, 2022
129d710
fix docstring
merveenoyan Jul 25, 2022
3a0ceec
fix docstring
merveenoyan Jul 25, 2022
0403c0f
fix docstring
merveenoyan Jul 25, 2022
725ba0f
fix expected nothing error
merveenoyan Jul 25, 2022
96453b0
added ellipsis
merveenoyan Jul 25, 2022
1b9caea
added ellipsis
merveenoyan Jul 25, 2022
c9d7c3d
putting ellipsis inside object
merveenoyan Jul 25, 2022
16693fa
putting ellipsis inside object works lol
merveenoyan Jul 25, 2022
e0691ff
putting ellipsis inside object
merveenoyan Jul 25, 2022
9e338b5
swapped namedtemporaryfile with tempdir for windows tests
merveenoyan Jul 25, 2022
f15b7d4
removed ellipsis at every line
merveenoyan Jul 26, 2022
68fd0e9
brought back ellipsis
merveenoyan Jul 26, 2022
2f2b05f
revert whitespaces for ellipsis
merveenoyan Jul 26, 2022
cb6379a
added model card generation to fixtures and added tests to metadata
merveenoyan Jul 26, 2022
57ab801
added model card generation to fixtures and added tests to metadata
merveenoyan Jul 26, 2022
bfc9f4d
misc nits
merveenoyan Jul 26, 2022
adbb9e3
Update examples/plot_model_card.py
merveenoyan Jul 26, 2022
e01143d
removed extra parantheses
merveenoyan Jul 26, 2022
3407014
Merge branch 'model_card_class' of github.com:merveenoyan/skops into …
merveenoyan Jul 26, 2022
8eaf98b
addressed comments
merveenoyan Jul 27, 2022
37858c1
Update skops/card/_model_card.py
merveenoyan Jul 27, 2022
ce57f6d
Update skops/card/_model_card.py
merveenoyan Jul 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions examples/plot_hf_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
import json
import os
import pickle
from pathlib import Path
from tempfile import mkdtemp, mkstemp
from uuid import uuid4

import sklearn
from huggingface_hub import HfApi
from modelcards import CardData
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.experimental import enable_halving_search_cv # noqa
Expand Down Expand Up @@ -86,9 +86,9 @@
# %%
# Model Card
# ==========
card_data = CardData(tags=["tabular-classification"])
Comment thread
adrinjalali marked this conversation as resolved.
model_card = card.create_model_card(model, card_data)
model_card.save(os.path.join(f"{local_repo}", "README.md"))
# We will now create a model card and save it
model_card = card.Card(model)
Comment thread
merveenoyan marked this conversation as resolved.
model_card.save(Path(local_repo) / "README.md")

# %%
# Push to Hub
Expand Down
80 changes: 49 additions & 31 deletions examples/plot_model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
# =======
# First we will import everything required for the rest of this document.

import os
import pickle
from pathlib import Path
from tempfile import mkdtemp, mkstemp

import sklearn
from modelcards import CardData
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.experimental import enable_halving_search_cv # noqa
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.model_selection import HalvingGridSearchCV, train_test_split

from skops import card, hub_utils
Expand Down Expand Up @@ -56,53 +56,71 @@
).fit(X_train, y_train)
model.score(X_test, y_test)


# %%
# Initialize a repository to save our files in
# ============================================
# We will now initialize a repository and save our model
Comment thread
adrinjalali marked this conversation as resolved.
_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl")

with open(pkl_name, mode="bw") as f:
pickle.dump(model, file=f)

local_repo = mkdtemp(prefix="skops-")

hub_utils.init(
model=pkl_name, requirements=[f"scikit-learn={sklearn.__version__}"], dst=local_repo
Comment thread
merveenoyan marked this conversation as resolved.
Comment thread
adrinjalali marked this conversation as resolved.
)

# %%
# Create a model card
# ====================
# We now create a model card, set couple of attributes and save it.
# We first set the metadata with CardData and pass it to create_model_card.
# Then, we pass information other than metadata in kwargs.
# We'll initialize a local repository and save the card with the model in it.
# We now create a model card.
# Then, we pass information using ``add()`` and plots using ``add_plot()``.
# We'll then save the card as `README.md`.

model_card = card.Card(model)


# %%
# Pass information and plots to our model card
# ============================================
# We will pass information to fill our model card.
# We will add plots to our card, note that these plots don't necessarily
# have to have a section in our template.
# We will save the plots, and then pass plot name with path to ``add_inspection``.

license = "mit"
limitations = "This model is not ready to be used in production."
model_description = (
"This is a HistGradientBoostingClassifier model trained on breast cancer dataset."
" It's trained with Halving Grid Search Cross Validation, with parameter grids on"
" max_leaf_nodes and max_depth."
)
license = "mit"

card_data = CardData(
license=license,
tags=["tabular-classification"],
datasets="breast-cancer",
metrics=["acc"],
)

model_card_authors = "skops_user"
get_started_code = (
"import pickle\nwith open(dtc_pkl_filename, 'rb') as file:\nclf = pickle.load(file)"
)
citation = "bibtex\n@inproceedings{...,year={2020}}"

model_card = card.create_model_card(
model,
card_data=card_data,
citation_bibtex = "bibtex\n@inproceedings{...,year={2020}}"
model_card.add(
Comment thread
merveenoyan marked this conversation as resolved.
Comment thread
adrinjalali marked this conversation as resolved.
citation_bibtex=citation_bibtex,
get_started_code=get_started_code,
model_card_authors=model_card_authors,
limitations=limitations,
model_description=model_description,
citation_bibtex=citation,
model_card_authors=model_card_authors,
get_started_code=get_started_code,
)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
disp.plot()

_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl")
disp.figure_.savefig(Path(local_repo) / "confusion_matrix.png")

with open(pkl_name, mode="bw") as f:
pickle.dump(model, file=f)
model_card.add_plot(**{"confusion matrix": "confusion_matrix.png"})

local_repo = mkdtemp(prefix="skops-")
hub_utils.init(
model=pkl_name, requirements=[f"scikit-learn={sklearn.__version__}"], dst=local_repo
)
# %%
# Save model card
# ===============
# We can simply save our model card by providing a path to ``save()``

model_card.save(os.path.join(f"{local_repo}", "README.md"))
model_card.save(Path(local_repo) / "README.md")
4 changes: 2 additions & 2 deletions skops/card/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from ._model_card import create_model_card
from ._model_card import Card

__all__ = ["create_model_card"]
__all__ = ["Card"]
221 changes: 173 additions & 48 deletions skops/card/_model_card.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,186 @@
import os
import copy
import re
import shutil
import tempfile
from pathlib import Path

from modelcards import ModelCard
from modelcards import CardData, ModelCard
from sklearn.utils import estimator_html_repr

import skops


def _extract_estimator_config(model):
"""Extracts estimator configuration and renders them into a vertical table.

Parameters
class Card:
Comment thread
adrinjalali marked this conversation as resolved.
"""Model card class that will be used to generate model card.
This class can be used to write information and plots to model card and save
it. This class by default generates an interactive plot of the model and a
table of hyperparameters. The slots to be filled are defined in the markdown
Comment thread
merveenoyan marked this conversation as resolved.
template.
Parameters:
----------
model (estimator): scikit-learn pipeline or model.
model: estimator object
Model that will be documented.
model_diagram: bool, optional
Set to True if model diagram should be plotted in the card.
Notes
-----
You can pass your own custom template using ``add`` method. You can add
plots to the model card template using ``add_plot``. The key you pass to
``add_plot`` will be used for header of the plot.

Returns
-------
str:
Markdown table of hyperparameters.
Examples
--------
>>> from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
>>> from sklearn.datasets import load_iris
>>> from sklearn.linear_model import LogisticRegression
>>> from skops import card
>>> X, y = load_iris(return_X_y=True)
>>> model = LogisticRegression(random_state=0).fit(X, y)
>>> model_card = card.Card(model)
>>> model_card.add(license="mit") # doctest: +ELLIPSIS
<skops.card._model_card.Card object at ...>
>>> y_pred = model.predict(X)
>>> cm = confusion_matrix(y, y_pred,labels=model.classes_)
>>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,
... display_labels=model.classes_)
>>> disp.plot() # doctest: +ELLIPSIS
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay object at ...>
>>> disp.figure_.savefig("confusion_matrix.png")
...
>>> model_card.add_plot(confusion_matrix="confusion_matrix.png") # doctest: +ELLIPSIS
<skops.card._model_card.Card object at ...>
>>> model_card.save((Path("save_dir") / "README.md")) # doctest: +ELLIPSIS
...
"""
hyperparameter_dict = model.get_params(deep=True)
table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
for hyperparameter, value in hyperparameter_dict.items():
table += f"| {hyperparameter} | {value} |\n"
return table

def __init__(self, model, model_diagram=True):
self.model = model
self.hyperparameter_table = self._extract_estimator_config()
# the spaces in the pipeline breaks markdown, so we replace them
if model_diagram is True:
self.model_plot = re.sub(r"\n\s+", "", str(estimator_html_repr(model)))
else:
self.model_plot = None
self.template_sections = {}
self._figure_paths = {}

def create_model_card(
model,
card_data,
**card_kwargs,
):
"""Creates a model card for the model and saves it to the target directory.
def add(self, **kwargs):
"""Takes values to fill model card template.
Comment thread
merveenoyan marked this conversation as resolved.
Parameters:
----------
**kwargs : dict
Parameters to be set for the model card. These parameters
need to be sections of the underlying `jinja` template used.
Comment thread
merveenoyan marked this conversation as resolved.
Returns:
--------
self : object
Card object.
"""
for section, value in kwargs.items():
self.template_sections[section] = value
return self
Comment thread
adrinjalali marked this conversation as resolved.

Parameters:
----------
model: estimator
scikit-learn compatible estimator.
card_data: CardData
CardData object.
card_kwargs:
Card kwargs are information you can pass to fill in the sections of the
card template, e.g. model_description, citation_bibtex, get_started_code.
"""
ROOT = skops.__path__
model_plot = re.sub(r"\n\s+", "", str(estimator_html_repr(model)))
hyperparameter_table = _extract_estimator_config(model)
card_data.library_name = "sklearn"
template_path = card_kwargs.get("template_path")
if template_path is None:
template_path = os.path.join(f"{ROOT[0]}", "card", "default_template.md")
card_kwargs["template_path"] = template_path
card = ModelCard.from_template(
card_data=card_data,
hyperparameter_table=hyperparameter_table,
model_plot=model_plot,
**card_kwargs,
)

return card
def add_plot(self, **kwargs):
"""Add plots to the model card.

Parameters:
----------
**kwargs : dict
The arguments should be of the form `name=plot_path`, where `name`
is the name of the plot and `plot_path` is the path to the plot,
relative to the root of the project. The plots should have already
been saved under the project's folder.
Comment thread
adrinjalali marked this conversation as resolved.
Returns:
--------
self : object
Card object.
"""
Comment thread
merveenoyan marked this conversation as resolved.
for plot_name, plot_path in kwargs.items():
self._figure_paths[plot_name] = plot_path
return self

def save(self, path):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a few issues with this function, please let me know if you agree:

  1. It mutates self.template_sections

As a user, I would be surprised to find that the save method actually mutates the object itself. At the moment, that might not cause a bug, but this type of hidden mutation has the tendency to come back to bite you. Therefore, I would avoid it except if there is no good reason.

To achieve this, simply declare a copy at the beginning of the method, such as template_sections = self.template_sections.copy(). After that, each time you use self.template_sections in this method, use template_sections instead.

It would be even safer to use copy.deepcopy but here we're creating a whole duplicate, I'm not sure if that can be too expensive in some circumstances (doesn't seem so but I'm not sure).

If you agree with this change, here is a unit test that would fail without the change but passes with the change:

import copy
...
def test_template_sections_not_mutated_by_save(destination_path, model_card):
    # TODO
    template_sections_before = copy.deepcopy(model_card.template_sections)
    model_card.save(Path(destination_path) / "README.md")
    template_sections_after = copy.deepcopy(model_card.template_sections)
    assert template_sections_before == template_sections_after
  1. Remove if self._figure_paths: branch

I saw that there is some code duplication that we can avoid. It is not strictly necessary to check if self._figure_paths and then do an else. The reason it can be avoided is because the for plot in self._figure_paths already is a no-op if self._figure_paths is empty, so the outcome is the same. Thus this whole code:

        if self._figure_paths:
            with tempfile.TemporaryDirectory() as tmpdirname:
                shutil.copyfile(
                    self.template_sections["template_path"],
                    f"{tmpdirname}/temporary_template.md",
                )
                #  create a temporary template with the additional plots
                self.template_sections[
                    "template_path"
                ] = f"{tmpdirname}/temporary_template.md"
                # add plots at the end of the template
                with open(self.template_sections["template_path"], "a") as template:
                    for plot in self._figure_paths:
                        template.write(
                            f"\n\n{plot}\n"
                            + f"![{plot}]({self._figure_paths[plot]})\n\n"
                        )
                card = ModelCard.from_template(
                    card_data=card_data,
                    hyperparameter_table=self.hyperparameter_table,
                    model_plot=self.model_plot,
                    **self.template_sections,
                )

        else:
            card = ModelCard.from_template(
                card_data=card_data,
                hyperparameter_table=self.hyperparameter_table,
                model_plot=self.model_plot,
                **self.template_sections,
            )

can be simplified to:

        with tempfile.TemporaryDirectory() as tmpdirname:
            shutil.copyfile(
                template_sections["template_path"],
                f"{tmpdirname}/temporary_template.md",
            )
            #  create a temporary template with the additional plots
            template_sections["template_path"] = f"{tmpdirname}/temporary_template.md"
            # add plots at the end of the template
            with open(template_sections["template_path"], "a") as template:
                for plot in self._figure_paths:
                    template.write(
                        f"\n\n{plot}\n"
                        + f"![{plot}]({self._figure_paths[plot]})\n\n"
                    )

            card = ModelCard.from_template(
                card_data=card_data,
                hyperparameter_table=self.hyperparameter_table,
                model_plot=self.model_plot,
                **template_sections,
            )

There is, however, a small disadvantage to the proposed change: It will result in the temporary copy of the template file to be made even if there is no plot. IMHO this is a small price to pay for the code simplification but feel free to disagree.

  1. Breaking down the method

This should probably be done in another PR but I wanted to write it down or else I'll forget:

The save method does too many different things at the same time and should be broken down. E.g. there could be one sub-method to get the card_data and another one to get the card.

"""Save the model card.

This method renders the model card in mardown format and then saves it
as the specified file.

Parameters:
----------
path: str, or Path
filepath to save your card.

Notes
-----
The keys in model card metadata can be seen
[here](https://huggingface.co/docs/hub/models-cards#model-card-metadata).
"""
root = skops.__path__

template_sections = copy.deepcopy(self.template_sections)

metadata_keys = [
Comment thread
merveenoyan marked this conversation as resolved.
"language",
"license",
"library_name",
"tags",
"datasets",
"model_name",
"metrics",
"model-index",
]
card_data_keys = {}

# if key is supposed to be in metadata and is provided by user, write it to card_data_keys
for key in template_sections.keys() & metadata_keys:
card_data_keys[key] = template_sections.pop(key, "")

# construct CardData
card_data = CardData(**card_data_keys)
card_data.library_name = "sklearn"

# if template path is not given, use default
if template_sections.get("template_path") is None:
template_sections["template_path"] = (
Path(root[0]) / "card" / "default_template.md"
)

# copying the template so that the original template is not touched/changed
# append plot_name if any plots are provided, at the end of the template
with tempfile.TemporaryDirectory() as tmpdirname:
shutil.copyfile(
Comment thread
merveenoyan marked this conversation as resolved.
template_sections["template_path"],
f"{tmpdirname}/temporary_template.md",
)
# create a temporary template with the additional plots
template_sections["template_path"] = f"{tmpdirname}/temporary_template.md"
# add plots at the end of the template
with open(template_sections["template_path"], "a") as template:
for plot in self._figure_paths:
template.write(
f"\n\n{plot}\n" + f"![{plot}]({self._figure_paths[plot]})\n\n"
)

card = ModelCard.from_template(
card_data=card_data,
hyperparameter_table=self.hyperparameter_table,
model_plot=self.model_plot,
**template_sections,
)

card.save(path)

def _extract_estimator_config(self):
"""Extracts estimator hyperparameters and renders them into a vertical table.

Returns
-------
str:
Markdown table of hyperparameters.
"""

hyperparameter_dict = self.model.get_params(deep=True)
table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
for hyperparameter, value in hyperparameter_dict.items():
table += f"| {hyperparameter} | {value} |\n"
return table
3 changes: 2 additions & 1 deletion skops/card/default_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,6 @@ You can contact the model card authors through following channels:
Below you can find information related to citation.

**BibTeX:**

```
{{ citation_bibtex | default("[More Information Needed]", true)}}
```
Loading