-
Notifications
You must be signed in to change notification settings - Fork 61
ENH Refactor Model Card #37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b0049bd
1d34c6f
e8d27e4
c9414b4
6000294
c9331ae
4901ee0
1ba069a
7bd29b1
8236b6c
ac466e3
985a971
8b852c3
df7ad96
456acbd
73dfed6
3eafa1e
ed17c0e
449c6ca
a93816d
8fddf62
2428a10
2bd553e
c384f59
6514e6f
522d5f4
04da3b6
7479cc8
a8789b5
1aa996e
611c7ca
3a0f9a9
b921994
c312a1f
b7f77d9
f379b83
535e698
e9908f3
a2aaaa7
5f4b0cc
27051f4
d6ded6b
129d710
3a0ceec
0403c0f
725ba0f
96453b0
1b9caea
c9d7c3d
16693fa
e0691ff
9e338b5
f15b7d4
68fd0e9
2f2b05f
cb6379a
57ab801
bfc9f4d
adbb9e3
e01143d
3407014
8eaf98b
37858c1
ce57f6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,3 @@ | ||
| from ._model_card import create_model_card | ||
| from ._model_card import Card | ||
|
|
||
| __all__ = ["create_model_card"] | ||
| __all__ = ["Card"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,61 +1,186 @@ | ||
| import os | ||
| import copy | ||
| import re | ||
| import shutil | ||
| import tempfile | ||
| from pathlib import Path | ||
|
|
||
| from modelcards import ModelCard | ||
| from modelcards import CardData, ModelCard | ||
| from sklearn.utils import estimator_html_repr | ||
|
|
||
| import skops | ||
|
|
||
|
|
||
| def _extract_estimator_config(model): | ||
| """Extracts estimator configuration and renders them into a vertical table. | ||
|
|
||
| Parameters | ||
| class Card: | ||
|
adrinjalali marked this conversation as resolved.
|
||
| """Model card class that will be used to generate model card. | ||
| This class can be used to write information and plots to model card and save | ||
| it. This class by default generates an interactive plot of the model and a | ||
| table of hyperparameters. The slots to be filled are defined in the markdown | ||
|
merveenoyan marked this conversation as resolved.
|
||
| template. | ||
| Parameters: | ||
| ---------- | ||
| model (estimator): scikit-learn pipeline or model. | ||
| model: estimator object | ||
| Model that will be documented. | ||
| model_diagram: bool, optional | ||
| Set to True if model diagram should be plotted in the card. | ||
| Notes | ||
| ----- | ||
| You can pass your own custom template using ``add`` method. You can add | ||
| plots to the model card template using ``add_plot``. The key you pass to | ||
| ``add_plot`` will be used for header of the plot. | ||
|
|
||
| Returns | ||
| ------- | ||
| str: | ||
| Markdown table of hyperparameters. | ||
| Examples | ||
| -------- | ||
| >>> from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix | ||
| >>> from sklearn.datasets import load_iris | ||
| >>> from sklearn.linear_model import LogisticRegression | ||
| >>> from skops import card | ||
| >>> X, y = load_iris(return_X_y=True) | ||
| >>> model = LogisticRegression(random_state=0).fit(X, y) | ||
| >>> model_card = card.Card(model) | ||
| >>> model_card.add(license="mit") # doctest: +ELLIPSIS | ||
| <skops.card._model_card.Card object at ...> | ||
| >>> y_pred = model.predict(X) | ||
| >>> cm = confusion_matrix(y, y_pred,labels=model.classes_) | ||
| >>> disp = ConfusionMatrixDisplay(confusion_matrix=cm, | ||
| ... display_labels=model.classes_) | ||
| >>> disp.plot() # doctest: +ELLIPSIS | ||
| <sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay object at ...> | ||
| >>> disp.figure_.savefig("confusion_matrix.png") | ||
| ... | ||
| >>> model_card.add_plot(confusion_matrix="confusion_matrix.png") # doctest: +ELLIPSIS | ||
| <skops.card._model_card.Card object at ...> | ||
| >>> model_card.save((Path("save_dir") / "README.md")) # doctest: +ELLIPSIS | ||
| ... | ||
| """ | ||
| hyperparameter_dict = model.get_params(deep=True) | ||
| table = "| Hyperparameters | Value |\n| :-- | :-- |\n" | ||
| for hyperparameter, value in hyperparameter_dict.items(): | ||
| table += f"| {hyperparameter} | {value} |\n" | ||
| return table | ||
|
|
||
| def __init__(self, model, model_diagram=True): | ||
| self.model = model | ||
| self.hyperparameter_table = self._extract_estimator_config() | ||
| # the spaces in the pipeline breaks markdown, so we replace them | ||
| if model_diagram is True: | ||
| self.model_plot = re.sub(r"\n\s+", "", str(estimator_html_repr(model))) | ||
| else: | ||
| self.model_plot = None | ||
| self.template_sections = {} | ||
| self._figure_paths = {} | ||
|
|
||
| def create_model_card( | ||
| model, | ||
| card_data, | ||
| **card_kwargs, | ||
| ): | ||
| """Creates a model card for the model and saves it to the target directory. | ||
| def add(self, **kwargs): | ||
| """Takes values to fill model card template. | ||
|
merveenoyan marked this conversation as resolved.
|
||
| Parameters: | ||
| ---------- | ||
| **kwargs : dict | ||
| Parameters to be set for the model card. These parameters | ||
| need to be sections of the underlying `jinja` template used. | ||
|
merveenoyan marked this conversation as resolved.
|
||
| Returns: | ||
| -------- | ||
| self : object | ||
| Card object. | ||
| """ | ||
| for section, value in kwargs.items(): | ||
| self.template_sections[section] = value | ||
| return self | ||
|
adrinjalali marked this conversation as resolved.
|
||
|
|
||
| Parameters: | ||
| ---------- | ||
| model: estimator | ||
| scikit-learn compatible estimator. | ||
| card_data: CardData | ||
| CardData object. | ||
| card_kwargs: | ||
| Card kwargs are information you can pass to fill in the sections of the | ||
| card template, e.g. model_description, citation_bibtex, get_started_code. | ||
| """ | ||
| ROOT = skops.__path__ | ||
| model_plot = re.sub(r"\n\s+", "", str(estimator_html_repr(model))) | ||
| hyperparameter_table = _extract_estimator_config(model) | ||
| card_data.library_name = "sklearn" | ||
| template_path = card_kwargs.get("template_path") | ||
| if template_path is None: | ||
| template_path = os.path.join(f"{ROOT[0]}", "card", "default_template.md") | ||
| card_kwargs["template_path"] = template_path | ||
| card = ModelCard.from_template( | ||
| card_data=card_data, | ||
| hyperparameter_table=hyperparameter_table, | ||
| model_plot=model_plot, | ||
| **card_kwargs, | ||
| ) | ||
|
|
||
| return card | ||
| def add_plot(self, **kwargs): | ||
| """Add plots to the model card. | ||
|
|
||
| Parameters: | ||
| ---------- | ||
| **kwargs : dict | ||
| The arguments should be of the form `name=plot_path`, where `name` | ||
| is the name of the plot and `plot_path` is the path to the plot, | ||
| relative to the root of the project. The plots should have already | ||
| been saved under the project's folder. | ||
|
adrinjalali marked this conversation as resolved.
|
||
| Returns: | ||
| -------- | ||
| self : object | ||
| Card object. | ||
| """ | ||
|
merveenoyan marked this conversation as resolved.
|
||
| for plot_name, plot_path in kwargs.items(): | ||
| self._figure_paths[plot_name] = plot_path | ||
| return self | ||
|
|
||
| def save(self, path): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have a few issues with this function, please let me know if you agree:
As a user, I would be surprised to find that the To achieve this, simply declare a copy at the beginning of the method, such as It would be even safer to use If you agree with this change, here is a unit test that would fail without the change but passes with the change: import copy
...
def test_template_sections_not_mutated_by_save(destination_path, model_card):
# TODO
template_sections_before = copy.deepcopy(model_card.template_sections)
model_card.save(Path(destination_path) / "README.md")
template_sections_after = copy.deepcopy(model_card.template_sections)
assert template_sections_before == template_sections_after
I saw that there is some code duplication that we can avoid. It is not strictly necessary to check if self._figure_paths:
with tempfile.TemporaryDirectory() as tmpdirname:
shutil.copyfile(
self.template_sections["template_path"],
f"{tmpdirname}/temporary_template.md",
)
# create a temporary template with the additional plots
self.template_sections[
"template_path"
] = f"{tmpdirname}/temporary_template.md"
# add plots at the end of the template
with open(self.template_sections["template_path"], "a") as template:
for plot in self._figure_paths:
template.write(
f"\n\n{plot}\n"
+ f"\n\n"
)
card = ModelCard.from_template(
card_data=card_data,
hyperparameter_table=self.hyperparameter_table,
model_plot=self.model_plot,
**self.template_sections,
)
else:
card = ModelCard.from_template(
card_data=card_data,
hyperparameter_table=self.hyperparameter_table,
model_plot=self.model_plot,
**self.template_sections,
)can be simplified to: with tempfile.TemporaryDirectory() as tmpdirname:
shutil.copyfile(
template_sections["template_path"],
f"{tmpdirname}/temporary_template.md",
)
# create a temporary template with the additional plots
template_sections["template_path"] = f"{tmpdirname}/temporary_template.md"
# add plots at the end of the template
with open(template_sections["template_path"], "a") as template:
for plot in self._figure_paths:
template.write(
f"\n\n{plot}\n"
+ f"\n\n"
)
card = ModelCard.from_template(
card_data=card_data,
hyperparameter_table=self.hyperparameter_table,
model_plot=self.model_plot,
**template_sections,
)There is, however, a small disadvantage to the proposed change: It will result in the temporary copy of the template file to be made even if there is no plot. IMHO this is a small price to pay for the code simplification but feel free to disagree.
This should probably be done in another PR but I wanted to write it down or else I'll forget: The |
||
| """Save the model card. | ||
|
|
||
| This method renders the model card in mardown format and then saves it | ||
| as the specified file. | ||
|
|
||
| Parameters: | ||
| ---------- | ||
| path: str, or Path | ||
| filepath to save your card. | ||
|
|
||
| Notes | ||
| ----- | ||
| The keys in model card metadata can be seen | ||
| [here](https://huggingface.co/docs/hub/models-cards#model-card-metadata). | ||
| """ | ||
| root = skops.__path__ | ||
|
|
||
| template_sections = copy.deepcopy(self.template_sections) | ||
|
|
||
| metadata_keys = [ | ||
|
merveenoyan marked this conversation as resolved.
|
||
| "language", | ||
| "license", | ||
| "library_name", | ||
| "tags", | ||
| "datasets", | ||
| "model_name", | ||
| "metrics", | ||
| "model-index", | ||
| ] | ||
| card_data_keys = {} | ||
|
|
||
| # if key is supposed to be in metadata and is provided by user, write it to card_data_keys | ||
| for key in template_sections.keys() & metadata_keys: | ||
| card_data_keys[key] = template_sections.pop(key, "") | ||
|
|
||
| # construct CardData | ||
| card_data = CardData(**card_data_keys) | ||
| card_data.library_name = "sklearn" | ||
|
|
||
| # if template path is not given, use default | ||
| if template_sections.get("template_path") is None: | ||
| template_sections["template_path"] = ( | ||
| Path(root[0]) / "card" / "default_template.md" | ||
| ) | ||
|
|
||
| # copying the template so that the original template is not touched/changed | ||
| # append plot_name if any plots are provided, at the end of the template | ||
| with tempfile.TemporaryDirectory() as tmpdirname: | ||
| shutil.copyfile( | ||
|
merveenoyan marked this conversation as resolved.
|
||
| template_sections["template_path"], | ||
| f"{tmpdirname}/temporary_template.md", | ||
| ) | ||
| # create a temporary template with the additional plots | ||
| template_sections["template_path"] = f"{tmpdirname}/temporary_template.md" | ||
| # add plots at the end of the template | ||
| with open(template_sections["template_path"], "a") as template: | ||
| for plot in self._figure_paths: | ||
| template.write( | ||
| f"\n\n{plot}\n" + f"\n\n" | ||
| ) | ||
|
|
||
| card = ModelCard.from_template( | ||
| card_data=card_data, | ||
| hyperparameter_table=self.hyperparameter_table, | ||
| model_plot=self.model_plot, | ||
| **template_sections, | ||
| ) | ||
|
|
||
| card.save(path) | ||
|
|
||
| def _extract_estimator_config(self): | ||
| """Extracts estimator hyperparameters and renders them into a vertical table. | ||
|
|
||
| Returns | ||
| ------- | ||
| str: | ||
| Markdown table of hyperparameters. | ||
| """ | ||
|
|
||
| hyperparameter_dict = self.model.get_params(deep=True) | ||
| table = "| Hyperparameters | Value |\n| :-- | :-- |\n" | ||
| for hyperparameter, value in hyperparameter_dict.items(): | ||
| table += f"| {hyperparameter} | {value} |\n" | ||
| return table | ||
Uh oh!
There was an error while loading. Please reload this page.