diff --git a/docs/changes.rst b/docs/changes.rst
index 7e208380..f1aa7739 100644
--- a/docs/changes.rst
+++ b/docs/changes.rst
@@ -21,6 +21,11 @@ v0.4
:pr:`242` by `Merve Noyan`_.
- Persistence now supports bytes and bytearrays, added tests to verify that
LightGBM, XGBoost, and CatBoost work now. :pr:`244` by `Benjamin Bossan`_.
+- :class:`.card.Card` now allows to add content to existing sections, using a
+ ``/`` to separate the subsections. E.g. use ``card.add(**{"Existing
+ section/New section": "content"})`` to add "content" a new subsection called
+ "New section" to an existing section called "Existing section". :pr:`203` by
+ `Benjamin Bossan`_.
v0.3
----
diff --git a/docs/model_card.rst b/docs/model_card.rst
index c1fe0fac..7a6c124d 100644
--- a/docs/model_card.rst
+++ b/docs/model_card.rst
@@ -42,32 +42,30 @@ to touch it yourself.
The markdown part does not necessarily need to follow any specification in
terms of information passed, which gives the user a lot of flexibility. The
-markdown part of the ``README.md`` file is generated from a Jinja template
-with slots that you can inject your content in. ``skops`` has a default
-template which includes the following slots for free text sections:
-
-- ``"model_description"``: A description of the model.
-- ``"limitations"``: Intended use for the model, limitations and potential
- biases. This section should also include risks of using models in certain
- domains if relevant.
-- ``"get_started_code"``: Code the user can run to load and use the model.
-- ``"model_card_authors"``: Authors of the model card. This section includes
- authors of the model card, while ``"citation_bibtex"`` includes citations
- related to the model if relevant.
-- ``"model_card_contact"``: Contact information of people whom can be reached
+markdown part of the ``README.md`` file comes with a couple of defaults provided
+by ``skops``, which includes the following slots for free text sections:
+
+- ``"Mode description"``: A description of the model.
+- ``"Intended uses & limitations"``: Intended use for the model, limitations and
+ potential biases. This section should also include risks of using models in
+ certain domains if relevant.
+- ``"How to Get Started with the Model"``: Code the user can run to load and use
+ the model.
+- ``"Model Card Authors"``: Authors of the model card. This section includes
+ authors of the model card
+- ``"Model Card Contact"``: Contact information of people whom can be reached
out, in case of questions about the model or the model card.
-- ``"citation_bibtex"``: Bibtex style citations for the model or resources used
- to train the model.
-- ``"eval_methods"``: Details about evaluation process of the model.
-- ``"eval_results"``: Evaluation results that are later parsed as a table by
- :class:`skops.card.Card`.
+- ``"Citation"``: Bibtex style citations for the model or resources used to
+ train the model.
+- ``"Evaluation Results"``: Evaluation results that are later parsed as a table
+ by :class:`skops.card.Card`.
The template also contains the following sections that are automatically
generated by ``skops``.
-- ``"hyperparameter_table"``: Hyperparameters of the model.
-- ``"model_plot"``: A diagram of the model, most relevant in case the model is
+- ``"Hyperparameters"``: Hyperparameters of the model.
+- ``"Model Plot"``: A diagram of the model, most relevant in case the model is
a complex scikit-learn :class:`~sklearn.pipeline.Pipeline`.
Furthermore, it is possible to add plots and tables to the model card. To add
@@ -77,5 +75,31 @@ dictionaries with the key being the header and the values being list of row
entries, or a pandas ``DataFrame``; use the :meth:`.Card.add_table` method for
this.
+To add content to an existing subsection, or create a new subsection, use a
+``"/"`` to indicate the subsection. E.g. let's assume you would like to add a
+subsection called ``"Figures"`` to the existing section ``"Model description"``,
+as well as adding some subsections with plots below that, you can call the
+:meth:`Card.add` method like this:
+
+.. code-block:: python
+
+ card.add(**{"Model description/Figures": "Here are some nice figures"})
+ card.add_plot(**{
+ "Model description/Figures/Confusion Matrix": "path-to-confusion-matrix.png",
+ "Model description/Figures/ROC": "path-to-roc.png",
+ })
+
+Furthermore, you can select existing sections (as well as their subsections)
+using :meth:`Card.select`, and you can delete sections using
+:meth:`Card.delete`:
+
+.. code-block:: python
+
+ section = card.select("Model description/Figures")
+ print(section.content) # 'Here are some nice figures'
+ print(section.subsections)
+ card.delete("Model description/Figures/ROC")
+
+
To see how you can use the API in ``skops`` to create a model card, please
refer to :ref:`sphx_glr_auto_examples_plot_model_card.py`.
diff --git a/examples/plot_model_card.py b/examples/plot_model_card.py
index 4acdfd1b..7a5ff3b3 100644
--- a/examples/plot_model_card.py
+++ b/examples/plot_model_card.py
@@ -29,7 +29,8 @@
)
from sklearn.model_selection import HalvingGridSearchCV, train_test_split
-from skops import card, hub_utils
+from skops import hub_utils
+from skops.card import Card, metadata_from_config
# %%
# Data
@@ -91,7 +92,7 @@
# :func:`.hub_utils.init` above. We will see below how we can populate the model
# card with useful information.
-model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo)))
+model_card = Card(model, metadata=metadata_from_config(Path(local_repo)))
# %%
# Add more information
@@ -103,17 +104,19 @@
model_card.metadata.license = "mit"
limitations = "This model is not ready to be used in production."
model_description = (
- "This is a HistGradientBoostingClassifier model trained on breast cancer dataset."
- " It's trained with Halving Grid Search Cross Validation, with parameter grids on"
- " max_leaf_nodes and max_depth."
+ "This is a `HistGradientBoostingClassifier` model trained on breast cancer "
+ "dataset. It's trained with `HalvingGridSearchCV`, with parameter grids on "
+ "`max_leaf_nodes` and `max_depth`."
)
model_card_authors = "skops_user"
-citation_bibtex = "bibtex\n@inproceedings{...,year={2020}}"
+citation_bibtex = "**BibTeX**\n\n```\n@inproceedings{...,year={2020}}\n```"
model_card.add(
- citation_bibtex=citation_bibtex,
- model_card_authors=model_card_authors,
- limitations=limitations,
- model_description=model_description,
+ **{
+ "Citation": citation_bibtex,
+ "Model Card Authors": model_card_authors,
+ "Model description": model_description,
+ "Model description/Intended uses & limitations": limitations,
+ }
)
# %%
@@ -132,10 +135,10 @@
y_pred = model.predict(X_test)
eval_descr = (
- "The model is evaluated on test data using accuracy and F1-score with macro"
- " average."
+ "The model is evaluated on test data using accuracy and F1-score with "
+ "macro average."
)
-model_card.add(eval_method=eval_descr)
+model_card.add(**{"Model description/Evaluation Results": eval_descr})
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average="micro")
@@ -146,7 +149,9 @@
disp.plot()
disp.figure_.savefig(Path(local_repo) / "confusion_matrix.png")
-model_card.add_plot(**{"Confusion matrix": "confusion_matrix.png"})
+model_card.add_plot(
+ **{"Model description/Evaluation Results/Confusion Matrix": "confusion_matrix.png"}
+)
cv_results = model.cv_results_
clf_report = classification_report(
@@ -160,8 +165,8 @@
model_card.add_table(
folded=True,
**{
- "Hyperparameter search results": cv_results,
- "Classification report": clf_report,
+ "Model description/Evaluation Results/Hyperparameter search results": cv_results,
+ "Model description/Evaluation Results/Classification report": clf_report,
},
)
diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py
index 6073e863..2adafa62 100644
--- a/skops/card/_model_card.py
+++ b/skops/card/_model_card.py
@@ -1,22 +1,21 @@
from __future__ import annotations
-import copy
import json
import re
-import shutil
-import tempfile
+import textwrap
import zipfile
-from dataclasses import dataclass
+from collections.abc import Mapping
+from dataclasses import dataclass, field
from pathlib import Path
from reprlib import Repr
-from typing import Any, Optional, Union
+from typing import Any, Iterator, Literal, Protocol, Sequence, Union
import joblib
-from huggingface_hub import ModelCard, ModelCardData
+from huggingface_hub import ModelCardData
from sklearn.utils import estimator_html_repr
from tabulate import tabulate # type: ignore
-import skops
+from skops.card._templates import CONTENT_PLACEHOLDER, SKOPS_TEMPLATE, Templates
from skops.io import load
# Repr attributes can be used to control the behavior of repr
@@ -25,6 +24,13 @@
aRepr.maxstring = 79
+VALID_TEMPLATES = {item.value for item in Templates}
+NEED_SECTION_ERR_MSG = (
+ "You are trying to {action} but you're using a custom template, please pass the "
+ "'section' argument to determine where to put the content"
+)
+
+
def wrap_as_details(text: str, folded: bool) -> str:
if not folded:
return text
@@ -35,7 +41,7 @@ def _clean_table(table: str) -> str:
# replace line breaks "\n" with html tag
, however, leave end-of-line
# line breaks (eol_lb) intact
eol_lb = "|\n"
- placeholder = "$%!?" # arbitrary sting that never appears naturally
+ placeholder = "$%!?" # arbitrary string that never appears naturally
table = (
table.replace(eol_lb, placeholder)
.replace("\n", "
")
@@ -64,7 +70,7 @@ def __repr__(self) -> str:
class TableSection:
"""Adds a table to the model card"""
- table: dict[str, list[Any]]
+ table: Mapping[str, Sequence[Any]]
folded: bool = False
def __post_init__(self) -> None:
@@ -76,17 +82,11 @@ def __post_init__(self) -> None:
self._is_pandas_df = False
if self._is_pandas_df:
- if self.table.empty: # type: ignore
- raise ValueError("Empty table added")
+ ncols = len(self.table.columns) # type: ignore
else:
ncols = len(self.table)
- if ncols == 0:
- raise ValueError("Empty table added")
-
- key = next(iter(self.table.keys()))
- nrows = len(self.table[key])
- if nrows == 0:
- raise ValueError("Empty table added")
+ if ncols == 0:
+ raise ValueError("Trying to add table with no columns")
def format(self) -> str:
if self._is_pandas_df:
@@ -165,6 +165,129 @@ def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData:
return card_data
+def split_subsection_names(key: str) -> list[str]:
+ r"""Split a string containing multiple sections into a list of strings for
+ each.
+
+ The separator is ``"/"``. To avoid splitting on ``"/"``, escape it using
+ ``"\\/"``.
+
+ Examples
+ --------
+ >>> split_subsection_names("Section A")
+ ['Section A']
+ >>> split_subsection_names("Section A/Section B/Section C")
+ ['Section A', 'Section B', 'Section C']
+ >>> split_subsection_names("A section containg \\/ a slash")
+ ['A section containg / a slash']
+ >>> split_subsection_names("Spaces are / stripped")
+ ['Spaces are', 'stripped']
+
+ Parameters
+ ----------
+ key : str
+ The section name consisting potentially of multiple subsections. It has
+ to be ensured beforhand that this is not an empty string.
+
+ Returns
+ -------
+ parts : list of str
+ The individual (sub)sections.
+
+ """
+ placeholder = "$%!?" # arbitrary sting that never appears naturally
+ key = key.replace("\\/", placeholder)
+ parts = (part.strip() for part in key.split("/"))
+ return [part.replace(placeholder, "/") for part in parts]
+
+
+def _getting_started_code(
+ file_name: str, model_format: Literal["pickle", "skops"], indent=" "
+) -> list[str]:
+ # get lines of code required to load the model
+ lines = [
+ "import json",
+ "import pandas as pd",
+ ]
+ if model_format == "skops":
+ lines += ["import skops.io as sio"]
+ else:
+ lines += ["import joblib"]
+
+ if model_format == "skops":
+ lines += [f'model = sio.load("{file_name}")']
+ else: # pickle
+ lines += [f'model = joblib.load("{file_name}")']
+
+ lines += [
+ 'with open("config.json") as f:',
+ indent + "config = json.load(f)",
+ 'model.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))',
+ ]
+ return lines
+
+
+@dataclass
+class Section:
+ """Building block of the model card.
+
+ The model card is represented internally as a dict with keys being strings
+ and values being Sections. The key is identical to the section title.
+
+ Additionally, the section may hold content in the form of strings (can be an
+ empty string) or a ``Formattable``, which is simply an object with a
+ ``format`` method that returns a string.
+
+ Finally, the section can contain subsections, which again are dicts of
+ string keys and section values (the dict can be empty). Therefore, the model
+ card representation forms a tree structure, making use of the fact that dict
+ order is preserved.
+
+ """
+
+ title: str
+ content: Formattable | str
+ subsections: dict[str, Section] = field(default_factory=dict)
+
+ def select(self, key: str) -> Section:
+ """Return a subsection or subsubsection of this section
+
+ Parameters
+ ----------
+ key : str
+ The name of the (sub)section to select. When selecting a subsection,
+ either use a ``"/"`` in the name to separate the parent and child
+ sections, chain multiple ``select`` calls.
+
+ Returns
+ -------
+ section : Section
+ A dataclass containing all information relevant to the selected
+ section. Those are the title, the content, and subsections (in a
+ dict).
+
+ Raises
+ ------
+ KeyError
+ If the given section name was not found, a ``KeyError`` is raised.
+ """
+ section_names = split_subsection_names(key)
+ # check that no section name is empty
+ if not all(bool(name) for name in section_names):
+ msg = f"Section name cannot be empty but got '{key}'"
+ raise KeyError(msg)
+
+ section = self
+ for section_name in section_names:
+ section = section.subsections[section_name]
+ return section
+
+
+class Formattable(Protocol):
+ def format(self) -> str:
+ ... # pragma: no cover
+
+
def _load_model(model: Any, trusted=False) -> Any:
"""Return a model instance.
@@ -210,8 +333,7 @@ class Card:
This class can be used to write information and plots to model card and save
it. This class by default generates an interactive plot of the model and a
- table of hyperparameters. The slots to be filled are defined in the markdown
- template.
+ table of hyperparameters. Some sections are added by default.
Parameters
----------
@@ -232,6 +354,17 @@ class Card:
of the ``config.json`` file, which itself is created by
:func:`skops.hub_utils.init`.
+ template: "skops", dict, or None (default="skops")
+ Whether to add default sections or not. The template can be a predefined
+ template, which at the moment can only be the string ``"skops"``, which
+ is a template provided by ``skops`` that is geared towards typical
+ sklearn models. If you don't want any prefilled sections, just pass
+ ``None``. If you want custom prefilled sections, pass a ``dict``, where
+ keys are the sections and values are the contents of the sections. Note
+ that when you use no template or a custom template, some methods will
+ not work, e.g. :meth:`Card.add_metrics`, since it's not clear where to
+ put the metrics when there is no template or a custom template.
+
trusted: bool, default=False
Passed to :func:`skops.io.load` if the model is a file path and it's
a `skops` file.
@@ -245,13 +378,6 @@ class Card:
Metadata to be stored at the beginning of the saved model card, as
metadata to be understood by the Hugging Face Hub.
- Notes
- -----
- The contents of the sections of the template can be set using
- :meth:`Card.add` method. Plots can be added to the model card using
- :meth:`Card.add_plot`. The key you pass to :meth:`Card.add_plot` will be
- used as the header of the plot.
-
Examples
--------
>>> from sklearn.metrics import (
@@ -264,20 +390,17 @@ class Card:
>>> from pathlib import Path
>>> from sklearn.datasets import load_iris
>>> from sklearn.linear_model import LogisticRegression
- >>> from skops import card
+ >>> from skops.card import Card
>>> X, y = load_iris(return_X_y=True)
>>> model = LogisticRegression(solver="liblinear", random_state=0).fit(X, y)
- >>> model_card = card.Card(model)
+ >>> model_card = Card(model)
>>> model_card.metadata.license = "mit"
>>> y_pred = model.predict(X)
>>> model_card.add_metrics(**{
... "accuracy": accuracy_score(y, y_pred),
... "f1 score": f1_score(y, y_pred, average="micro"),
... })
- Card(
- model=LogisticRegression(random_state=0, solver='liblinear'),
- metadata.license=mit,
- )
+ Card(...)
>>> cm = confusion_matrix(y, y_pred,labels=model.classes_)
>>> disp = ConfusionMatrixDisplay(
... confusion_matrix=cm,
@@ -288,64 +411,547 @@ class Card:
>>> tmp_path = Path(tempfile.mkdtemp(prefix="skops-"))
>>> disp.figure_.savefig(tmp_path / "confusion_matrix.png")
...
- >>> model_card.add_plot(confusion_matrix="confusion_matrix.png")
+ >>> model_card.add_plot(**{
+ ... "Model description/Confusion Matrix": tmp_path / "confusion_matrix.png"
+ ... })
+ Card(...)
+ >>> # add new content to the existing section "Model description"
+ >>> model_card.add(**{"Model description": "This is the best model"})
+ Card(...)
+ >>> # add content to a new section
+ >>> model_card.add(**{"A new section": "Please rate my model"})
+ Card(...)
+ >>> # add new subsection to an existing section by using "/"
+ >>> model_card.add(**{"Model description/Model name": "This model is called Bob"})
Card(
model=LogisticRegression(random_state=0, solver='liblinear'),
metadata.license=mit,
- confusion_matrix='...confusion_matrix.png',
+ Model description=This is the best model,
+ Model description/Training Procedure/... | | warm_start | False | ,
+ Model description/Training Procedure/...,
+ Model description/Evaluation Results=...ccuracy | 0.96 | | f1 score | 0.96 |,
+ Model description/Confusion Matrix=...confusion_matrix.png'),
+ Model description/Model name=This model is called Bob,
+ A new section=Please rate my model,
)
+ >>> # save the card to a README.md file
>>> model_card.save(tmp_path / "README.md")
"""
def __init__(
self,
- model: Any,
+ model,
model_diagram: bool = True,
- metadata: Optional[ModelCardData] = None,
+ metadata: ModelCardData | None = None,
+ template: Literal["skops"] | dict[str, str] | None = "skops",
trusted: bool = False,
) -> None:
self.model = model
self.model_diagram = model_diagram
- self._eval_results = {} # type: ignore
- self._template_sections: dict[str, str] = {}
- self._extra_sections: list[tuple[str, Any]] = []
self.metadata = metadata or ModelCardData()
+ self.template = template
self.trusted = trusted
+ self._data: dict[str, Section] = {}
+ self._metrics: dict[str, str | float | int] = {}
+
+ self._populate_template()
+
+ def _populate_template(self):
+ """If initialized with a template, use it to populate the card."""
+ if not self.template:
+ return
+
+ if isinstance(self.template, str) and (self.template not in VALID_TEMPLATES):
+ valid_templates = ", ".join(f"'{val}'" for val in sorted(VALID_TEMPLATES))
+ msg = (
+ f"Unknown template '{self.template}', "
+ f"template must be one of the following values: {valid_templates}"
+ )
+ raise ValueError(msg)
+
+ if self.template == Templates.skops.value:
+ self.add(**SKOPS_TEMPLATE)
+ # for the skops template, automatically add some default sections
+ self.add_model_plot()
+ self.add_hyperparams()
+ self.add_get_started_code()
+ elif isinstance(self.template, Mapping):
+ self.add(**self.template)
+
def get_model(self) -> Any:
- """Returns sklearn estimator object if ``Path``/``str``
- is provided.
+ """Returns sklearn estimator object.
+
+ If the ``model`` is already loaded, return it as is. If the ``model``
+ attribute is a ``Path``/``str``, load the model and return it.
Returns
-------
- model : Object
- Model instance.
+ model : BaseEstimator
+ The model instance.
+
"""
model = _load_model(self.model, self.trusted)
+ # Ideally, we would only call the method below if we *know* that the
+ # model has changed, but at the moment we have no way of knowing that
return model
- def add(self, **kwargs: str) -> "Card":
- """Takes values to fill model card template.
+ def add(self, **kwargs: str | Formattable) -> Card:
+ """Add new section(s) to the model card.
+
+ Add one or multiple sections to the model card. The section names are
+ taken from the keys and the contents are taken from the values.
+
+ To add to an existing section, use a ``"/"`` in the section name, e.g.:
+
+ ``card.add(**{"Existing section/New section": "content"})``.
+
+ If the parent section does not exist, it will be added automatically.
+
+ To add a section with ``"/"`` in its title (i.e. not inteded as a
+ subsection), escape the slash like so, ``"\\/"``, e.g.:
+
+ ``card.add(**{"A section with\\/a slash in the title": "content"})``.
+
+ If a section of the given name already exists, its content will be
+ overwritten.
Parameters
----------
**kwargs : dict
- Parameters to be set for the model card. These parameters
- need to be sections of the underlying `jinja` template used.
+ The keys of the dictionary serve as the section title and the values
+ as the section content. It's possible to add to existing sections.
+
+ Returns
+ -------
+ self : object
+ Card object.
+
+ """
+ for key, val in kwargs.items():
+ self._add_single(key, val)
+ return self
+
+ def _select(
+ self, subsection_names: Sequence[str], create: bool = True
+ ) -> dict[str, Section]:
+ """Select a single section from the data.
+
+ Parameters
+ ----------
+ subsection_names: list of str
+ The subsection names, already split into individual subsections.
+
+ create: bool (default=True)
+ Whether to create the subsection if it does not already exist or
+ not.
+
+ Returns
+ -------
+ section: dict of Section
+ A dict mapping the section key (identical to the title) to the
+ actual ``Section``, which is a dataclass that contains the actual
+ data of the section.
+
+ Raises
+ ------
+ KeyError
+ If the section does not exist and ``create=False``, raises a
+ ``KeyError``.
+
+ """
+ section = self._data
+ if not subsection_names:
+ return section
+
+ for subsection_name in subsection_names:
+ section_maybe = section.get(subsection_name)
+
+ # there are already subsections
+ if section_maybe is not None:
+ section = section_maybe.subsections
+ continue
+
+ if create:
+ # no subsection, create
+ entry = Section(title=subsection_name, content="")
+ section[subsection_name] = entry
+ section = entry.subsections
+ else:
+ raise KeyError(f"Section {subsection_name} does not exist")
+
+ return section
+
+ def select(self, key: str) -> Section:
+ """Select a section from the model card.
+
+ To select a subsection of an existing section, use a ``"/"`` in the
+ section name, e.g.:
+
+ ``card.select("Main section/Subsection")``.
+
+ Alternatively, multiple ``select`` calls can be chained:
+
+ ``card.select("Main section").select("Subsection")``.
+
+ Parameters
+ ----------
+ key : str
+ The name of the (sub)section to select. When selecting a subsection,
+ either use a ``"/"`` in the name to separate the parent and child
+ sections, chain multiple ``select`` calls.
+
+ Returns
+ -------
+ self : Section
+ A dataclass containing all information relevant to the selected
+ section. Those are the title, the content, and subsections (in a
+ dict).
+
+ Raises
+ ------
+ KeyError
+ If the given section name was not found, a ``KeyError`` is raised.
+
+ """
+ if not key:
+ msg = f"Section name cannot be empty but got '{key}'"
+ raise KeyError(msg)
+
+ *subsection_names, leaf_node_name = split_subsection_names(key)
+
+ if not leaf_node_name:
+ msg = f"Section name cannot be empty but got '{key}'"
+ raise KeyError(msg)
+
+ parent_section = self._select(subsection_names, create=False)
+ return parent_section[leaf_node_name]
+
+ def delete(self, key: str | Sequence[str]) -> None:
+ """Delete a section from the model card.
+
+ To delete a subsection of an existing section, use a ``"/"`` in the
+ section name, e.g.:
+
+ ``card.delete("Existing section/New section")``.
+
+ Alternatively, a list of strings can be passed:
+
+ ``card.delete(["Existing section", "New section"])``.
+
+ Parameters
+ ----------
+ key : str or list of str
+ The name of the (sub)section to select. When selecting a subsection,
+ either use a ``"/"`` in the name to separate the parent and child
+ sections, or pass a list of strings.
+
+ Raises
+ ------
+ KeyError
+ If the given section name was not found, a ``KeyError`` is raised.
+
+ """
+ if not key:
+ msg = f"Section name cannot be empty but got '{key}'"
+ raise KeyError(msg)
+
+ if isinstance(key, str):
+ *subsection_names, leaf_node_name = split_subsection_names(key)
+ else:
+ *subsection_names, leaf_node_name = key
+
+ if not leaf_node_name:
+ msg = f"Section name cannot be empty but got '{key}'"
+ raise KeyError(msg)
+
+ parent_section = self._select(subsection_names, create=False)
+ del parent_section[leaf_node_name]
+
+ def _add_single(self, key: str, val: Formattable | str) -> Section:
+ """Add a single section.
+
+ If the (sub)section does not exist, it is created. Otherwise, the
+ existing (sub)section is modified.
+
+ Parameters
+ ----------
+ key: str
+ The name of the (sub)section.
+
+ val: str or Formattable
+ The value to assign to the (sub)section.
+
+ """
+ *subsection_names, leaf_node_name = split_subsection_names(key)
+ section = self._select(subsection_names)
+
+ if leaf_node_name in section:
+ # entry exists, only overwrite content
+ section[leaf_node_name].content = val
+ else:
+ # entry does not exist, create a new one
+ section[leaf_node_name] = Section(title=leaf_node_name, content=val)
+
+ return section[leaf_node_name]
+
+ def add_model_plot(
+ self,
+ section: str | None = None,
+ description: str | None = None,
+ ) -> Card:
+ """Add a model plot
+
+ Use sklearn model visualization to add create a diagram of the model.
+ See the `sklearn model visualization docs
+ `_.
+
+ The model diagram is not added if the card class was instantiated with
+ ``model_diagram=False``.
+
+ Parameters
+ ----------
+ section : str or None, default=None
+ The section that the model plot should be added to. If you're using
+ the default skops template, you can leave this parameter as
+ ``None``, otherwise you have to indicate the section. If the section
+ does not exist, it will be created for you.
+
+ description : str or None, default=None
+ An optional description to be added before the model plot. If you're
+ using the default skops template, a standard text is used. Pass a
+ string here if you want to use your own text instead. Leave this
+ empty to not add any description.
+
+ Returns
+ -------
+ self : object
+ Card object.
+ """
+ if not self.model_diagram:
+ return self
+
+ if section is None:
+ if self.template == Templates.skops.value:
+ section = "Model description/Training Procedure/Model Plot"
+ else:
+ msg = NEED_SECTION_ERR_MSG.format(action="add a model plot")
+ raise ValueError(msg)
+
+ if description is None:
+ if self.template == Templates.skops.value:
+ description = "The model plot is below."
+
+ self._add_model_plot(self.get_model(), section=section, description=description)
+
+ return self
+
+ def _add_model_plot(
+ self, model: Any, section: str, description: str | None
+ ) -> None:
+ """Add model plot section
+
+ The model should be a loaded sklearn model, not a path.
+
+ """
+ model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(model)))
+ if model_plot_div.count("sk-top-container") == 1:
+ model_plot_div = model_plot_div.replace(
+ "sk-top-container", 'sk-top-container" style="overflow: auto;'
+ )
+
+ if description:
+ content = f"{description}\n\n{model_plot_div}"
+ else:
+ content = model_plot_div
+
+ self._add_single(section, content)
+
+ def add_hyperparams(
+ self, section: str | None = None, description: str | None = None
+ ) -> Card:
+ """Add the model's hyperparameters as a table
+
+ Parameters
+ ----------
+ section : str or None, default=None
+ The section that the hyperparamters should be added to. If you're
+ using the default skops template, you can leave this parameter as
+ ``None``, otherwise you have to indicate the section. If the section
+ does not exist, it will be created for you.
+
+ description : str or None, default=None
+ An optional description to be added before the hyperparamters. If
+ you're using the default skops template, a standard text is used.
+ Pass a string here if you want to use your own text instead. Leave
+ this empty to not add any description.
+
+ Returns
+ -------
+ self : object
+ Card object.
+
+ """
+ if section is None:
+ if self.template == Templates.skops.value:
+ section = "Model description/Training Procedure/Hyperparameters"
+ else:
+ msg = NEED_SECTION_ERR_MSG.format(action="add model hyperparameters")
+ raise ValueError(msg)
+
+ if description is None:
+ if self.template == Templates.skops.value:
+ description = "The model is trained with below hyperparameters."
+
+ self._add_hyperparams(
+ self.get_model(), section=section, description=description
+ )
+ return self
+
+ def _add_hyperparams(
+ self, model: Any, section: str, description: str | None
+ ) -> None:
+ """Add hyperparameter section.
+
+ The model should be a loaded sklearn model, not a path.
+
+ """
+ hyperparameter_dict = model.get_params(deep=True)
+ table = _clean_table(
+ tabulate(
+ list(hyperparameter_dict.items()),
+ headers=["Hyperparameter", "Value"],
+ tablefmt="github",
+ )
+ )
+ table_folded = textwrap.dedent(
+ """
+
+ Click to expand
+
+ {}
+
+ """
+ ).format(table)
+
+ if description:
+ content = f"{description}\n{table_folded}"
+ else:
+ content = table_folded
+
+ self._add_single(section, content)
+
+ def add_get_started_code(
+ self,
+ section: str | None = None,
+ description: str | None = None,
+ file_name: str | None = None,
+ model_format: Literal["pickle", "skops"] | None = None,
+ ) -> Card:
+ """Add getting started code
+
+ This code can be copied by users to load the model and make predictions
+ with it.
+
+ Parameters
+ ----------
+ section : str or None, default=None
+ The section that the code should be added to. If you're using the
+ default skops template, you can leave this parameter as ``None``,
+ otherwise you have to indicate the section. If the section does not
+ exist, it will be created for you.
+
+ description : str or None, default=None
+ An optional description to be added before the code. If you're using
+ the default skops template, a standard text is used. Pass a string
+ here if you want to use your own text instead. Leave this empty to
+ not add any description.
+
+ file_name : str or None, default=None
+ The file name of the model. If no file name is indicated, there will
+ be an attempt to read the file name from the card's metadata. If
+ that fails, an error is raised and you have to pass this argument
+ explicitly.
+
+ model_format : "skops", "pickle", or None, default=None
+ The model format used to store the model.If format is indicated,
+ there will be an attempt to read the model format from the card's
+ metadata. If that fails, an error is raised and you have to pass
+ this argument explicitly.
Returns
-------
self : object
Card object.
+
"""
- for section, value in kwargs.items():
- self._template_sections[section] = value
+ if file_name is None:
+ file_name = self.metadata.to_dict().get("model_file")
+
+ if model_format is None:
+ model_format = (
+ self.metadata.to_dict().get("sklearn", {}).get("model_format")
+ )
+
+ if model_format and (model_format not in ("pickle", "skops")):
+ msg = (
+ f"Invalid model format '{model_format}', should be one of "
+ "'pickle' or 'skops'"
+ )
+ raise ValueError(msg)
+
+ if (not file_name) or (not model_format):
+ return self
+
+ if section is None:
+ if self.template == Templates.skops.value:
+ section = "How to Get Started with the Model"
+ else:
+ msg = NEED_SECTION_ERR_MSG.format(action="add get started code")
+ raise ValueError(msg)
+
+ if description is None:
+ if self.template == Templates.skops.value:
+ description = "Use the code below to get started with the model."
+
+ self._add_get_started_code(
+ section,
+ file_name=file_name,
+ model_format=model_format,
+ description=description,
+ )
+
return self
- def add_plot(self, folded=False, **kwargs: str) -> "Card":
+ def _add_get_started_code(
+ self,
+ section: str,
+ file_name: str,
+ model_format: Literal["pickle", "skops"],
+ description: str | None,
+ indent: str = " ",
+ ) -> None:
+ """Add getting started code to the corresponding section"""
+ lines = _getting_started_code(
+ file_name, model_format=model_format, indent=indent
+ )
+ lines = ["```python"] + lines + ["```"]
+ code = "\n".join(lines)
+
+ if description:
+ content = f"{description}\n\n{code}"
+ else:
+ content = code
+
+ self._add_single(section, content)
+
+ def add_plot(self, *, folded=False, **kwargs: str) -> Card:
"""Add plots to the model card.
+ The plot should be saved on the file system and the path passed as
+ value.
+
Parameters
----------
folded: bool (default=False)
@@ -355,22 +961,27 @@ def add_plot(self, folded=False, **kwargs: str) -> "Card":
large.
**kwargs : dict
- The arguments should be of the form `name=plot_path`, where `name`
- is the name of the plot and `plot_path` is the path to the plot,
- relative to the root of the project. The plots should have already
- been saved under the project's folder.
+ The arguments should be of the form ``name=plot_path``, where
+ ``name`` is the name of the plot and section, and ``plot_path`` is
+ the path to the plot on the file system, relative to the root of the
+ project. The plots should have already been saved under the
+ project's folder.
Returns
-------
self : object
Card object.
+
"""
- for plot_name, plot_path in kwargs.items():
+ for section_name, plot_path in kwargs.items():
+ plot_name = split_subsection_names(section_name)[-1]
section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded)
- self._extra_sections.append((plot_name, section))
+ self._add_single(section_name, section)
return self
- def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> Card:
+ def add_table(
+ self, *, folded: bool = False, **kwargs: dict["str", list[Any]]
+ ) -> Card:
"""Add a table to the model card.
Add a table to the model card. This can be especially useful when you
@@ -417,212 +1028,213 @@ def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> C
"""
for key, val in kwargs.items():
section = TableSection(table=val, folded=folded)
- self._extra_sections.append((key, section))
+ self._add_single(key, section)
return self
- def add_metrics(self, **kwargs: str) -> "Card":
+ def add_metrics(
+ self,
+ section: str | None = None,
+ description: str | None = None,
+ **kwargs: str | int | float,
+ ) -> Card:
"""Add metric values to the model card.
+ All metrics will be collected in, and then formatted to, a table.
+
Parameters
----------
+ section : str or None, default=None
+ The section that the metrics should be added to. If you're using the
+ default skops template, you can leave this parameter as ``None``,
+ otherwise you have to indicate the section. If the section does not
+ exist, it will be created for you.
+
+ description : str or None, default=None
+ An optional description to be added before the metrics. If you're
+ using the default skops template, a standard text is used. Pass a
+ string here if you want to use your own text instead. Leave this
+ empty to not add any description.
+
**kwargs : dict
- A dictionary of the form `{metric name: metric value}`.
+ A dictionary of the form ``{metric name: metric value}``.
Returns
-------
self : object
Card object.
"""
- for metric, value in kwargs.items():
- self._eval_results[metric] = value
- return self
+ if section is None:
+ if self.template == Templates.skops.value:
+ section = "Model description/Evaluation Results"
+ else:
+ msg = NEED_SECTION_ERR_MSG.format(action="add metrics")
+ raise ValueError(msg)
+
+ if description is None:
+ if self.template == Templates.skops.value:
+ description = (
+ "You can find the details about evaluation process and "
+ "the evaluation results."
+ )
- def _generate_card(self) -> ModelCard:
- """Generate the ModelCard object
+ self._metrics.update(kwargs)
+ self._add_metrics(section, self._metrics, description=description)
+ return self
- Returns
- -------
- card : huggingface_hub.ModelCard
- The final :class:`huggingface_hub.ModelCard` object with all
- placeholders filled and all extra sections inserted.
- """
- root = skops.__path__
-
- # add evaluation results
-
- template_sections = copy.deepcopy(self._template_sections)
-
- if self.metadata:
- model_file = self.metadata.to_dict().get("model_file")
- if model_file and model_file.endswith(".skops"):
- template_sections["get_started_code"] = (
- "from skops.io import load\nimport json\n"
- "import pandas as pd\n"
- f'clf = load("{model_file}")\n'
- 'with open("config.json") as f:\n '
- " config ="
- " json.load(f)\n"
- 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))'
- )
- elif model_file is not None:
- template_sections["get_started_code"] = (
- "import joblib\nimport json\nimport pandas as pd\nclf ="
- f' joblib.load({model_file})\nwith open("config.json") as'
- " f:\n "
- " config ="
- " json.load(f)\n"
- 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))'
- )
- if self.model_diagram is True:
- model_plot_div = re.sub(
- r"\n\s+", "", str(estimator_html_repr(self.get_model()))
- )
- if model_plot_div.count("sk-top-container") == 1:
- model_plot_div = model_plot_div.replace(
- "sk-top-container", 'sk-top-container" style="overflow: auto;'
- )
- model_plot: str | None = model_plot_div
+ def _add_metrics(
+ self,
+ section: str,
+ metrics: dict[str, str | float | int],
+ description: str | None,
+ ) -> None:
+ """Add metrics to the Evaluation Results section."""
+ if self._metrics:
+ data_transposed = zip(*self._metrics.items()) # make column oriented
+ inp = {key: val for key, val in zip(["Metric", "Value"], data_transposed)}
+ table = TableSection(inp).format()
else:
- model_plot = None
- template_sections["eval_results"] = tabulate(
- list(self._eval_results.items()),
- headers=["Metric", "Value"],
- tablefmt="github",
- )
+ # create empty table
+ table = TableSection({"Metric": [], "Value": []}).format()
- # if template path is not given, use default
- if template_sections.get("template_path") is None:
- template_sections["template_path"] = str(
- Path(root[0]) / "card" / "default_template.md"
- )
+ if description:
+ content = f"{description}\n\n{table}"
+ else:
+ content = table
- # copying the template so that the original template is not touched/changed
- # append plot_name if any plots are provided, at the end of the template
- with tempfile.TemporaryDirectory() as tmpdirname:
- shutil.copyfile(
- template_sections["template_path"],
- f"{tmpdirname}/temporary_template.md",
- )
- # create a temporary template with the additional plots
- template_sections["template_path"] = f"{tmpdirname}/temporary_template.md"
- # add extra sections at the end of the template
- with open(template_sections["template_path"], "a") as template:
- if self._extra_sections:
- template.write("\n\n# Additional Content\n")
-
- for key, val in self._extra_sections:
- formatted = val.format()
- template.write(f"\n## {key}\n\n{formatted}\n")
-
- card = ModelCard.from_template(
- card_data=self.metadata,
- hyperparameter_table=self._extract_estimator_config(),
- model_plot=model_plot,
- **template_sections,
- )
- return card
+ self._add_single(section, content)
- def save(self, path: str | Path) -> None:
- """Save the model card.
+ def _generate_metadata(self, metadata: ModelCardData) -> Iterator[str]:
+ """Yield metadata in yaml format"""
+ for key, val in metadata.to_dict().items() if metadata else {}:
+ yield aRepr.repr(f"metadata.{key}={val},").strip('"').strip("'")
- This method renders the model card in markdown format and then saves it
- as the specified file.
+ def _generate_content(
+ self, data: dict[str, Section], depth: int = 1
+ ) -> Iterator[str]:
+ """Yield title and (formatted) contents.
- Parameters
- ----------
- path: str, or Path
- Filepath to save your card.
+ Recursively go through the data and consecutively yield the title with
+ the appropriate number of "#"s (markdown format), then the associated
+ content.
- Notes
- -----
- The keys in model card metadata can be seen `here
- `__.
"""
- card = self._generate_card()
- card.save(path)
+ for val in data.values():
+ title = f"{depth * '#'} {val.title}"
+ yield title
- def render(self) -> str:
- """Render the final model card as a string.
+ if isinstance(val.content, str):
+ yield val.content
+ else: # is a Formattable
+ yield val.content.format()
- Returns
- -------
- card : str
- The rendered model card with all placeholders filled and all extra
- sections inserted.
- """
- card = self._generate_card()
- return str(card)
+ if val.subsections:
+ yield from self._generate_content(val.subsections, depth=depth + 1)
- def _extract_estimator_config(self) -> str:
- """Extracts estimator hyperparameters and renders them into a vertical table.
+ def _iterate_content(
+ self, data: dict[str, Section], parent_section: str = ""
+ ) -> Iterator[tuple[str, Formattable | str]]:
+ """Yield tuples of title and (non-formatted) content."""
+ for val in data.values():
+ if parent_section:
+ title = "/".join((parent_section, val.title))
+ else:
+ title = val.title
- Returns
- -------
- str:
- Markdown table of hyperparameters.
- """
- hyperparameter_dict = self.get_model().get_params(deep=True)
- return _clean_table(
- tabulate(
- list(hyperparameter_dict.items()),
- headers=["Hyperparameter", "Value"],
- tablefmt="github",
- )
- )
+ yield title, val.content
+
+ if val.subsections:
+ yield from self._iterate_content(val.subsections, parent_section=title)
@staticmethod
- def _strip_blank(text) -> str:
- # remove new lines and multiple spaces
+ def _format_repr(text: str) -> str:
+ # Remove new lines, multiple spaces, quotation marks, and cap line length
text = text.replace("\n", " ")
text = re.sub(r"\s+", r" ", text)
- return text
+ return aRepr.repr(text).strip('"').strip("'")
def __str__(self) -> str:
return self.__repr__()
def __repr__(self) -> str:
- # create repr for model
+ # repr for the model
model = getattr(self, "model", None)
if model:
- model_str = self._strip_blank(repr(self.get_model()))
- model_repr = aRepr.repr(f" model={model_str},").strip('"').strip("'")
+ model_repr = self._format_repr(f"model={repr(self.get_model())},")
else:
model_repr = None
- # metadata
+ # repr for metadata
metadata_reprs = []
for key, val in self.metadata.to_dict().items() if self.metadata else {}:
if key == "widget":
- metadata_reprs.append(" metadata.widget={...},")
+ metadata_reprs.append("metadata.widget={...},")
continue
- metadata_reprs.append(
- aRepr.repr(f" metadata.{key}={val},").strip('"').strip("'")
- )
+ metadata_reprs.append(self._format_repr(f"metadata.{key}={val},"))
metadata_repr = "\n".join(metadata_reprs)
- # normal sections
- template_reprs = []
- for key, val in self._template_sections.items():
- val = self._strip_blank(repr(val))
- template_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'"))
- template_repr = "\n".join(template_reprs)
-
- # figures
- figure_reprs = []
- for key, val in self._extra_sections:
- val = self._strip_blank(repr(val))
- figure_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'"))
- figure_repr = "\n".join(figure_reprs)
+ # repr for contents
+ content_reprs = []
+ for title, content in self._iterate_content(self._data):
+ if not content:
+ continue
+ if isinstance(content, str) and content.rstrip("`").rstrip().endswith(
+ CONTENT_PLACEHOLDER
+ ):
+ # if content is just some default text, no need to show it
+ continue
+ content_reprs.append(self._format_repr(f"{title}={content},"))
+ content_repr = "\n".join(content_reprs)
+ # combine all parts
complete_repr = "Card(\n"
if model_repr:
- complete_repr += model_repr + "\n"
+ complete_repr += textwrap.indent(model_repr, " ") + "\n"
if metadata_reprs:
- complete_repr += metadata_repr + "\n"
- if template_repr:
- complete_repr += template_repr + "\n"
- if figure_repr:
- complete_repr += figure_repr + "\n"
+ complete_repr += textwrap.indent(metadata_repr, " ") + "\n"
+ if content_reprs:
+ complete_repr += textwrap.indent(content_repr, " ") + "\n"
complete_repr += ")"
return complete_repr
+
+ def _generate_card(self) -> Iterator[str]:
+ """Yield sections of the model card, including the metadata."""
+ if self.metadata.to_dict():
+ yield f"---\n{self.metadata.to_yaml()}\n---"
+
+ for line in self._generate_content(self._data):
+ if line:
+ yield "\n" + line
+
+ # add an empty line add the end
+ yield ""
+
+ def save(self, path: str | Path) -> None:
+ """Save the model card.
+
+ This method renders the model card in markdown format and then saves it
+ as the specified file.
+
+ Parameters
+ ----------
+ path: str, or Path
+ Filepath to save your card.
+
+ Notes
+ -----
+ The keys in model card metadata can be seen `here
+ `__.
+ """
+ with open(path, "w", encoding="utf-8") as f:
+ f.write("\n".join(self._generate_card()))
+
+ def render(self) -> str:
+ """Render the final model card as a string.
+
+ Returns
+ -------
+ result : str
+ The rendered model card with all placeholders filled and all extra
+ sections inserted.
+ """
+ return "\n".join(self._generate_card())
diff --git a/skops/card/_templates.py b/skops/card/_templates.py
new file mode 100644
index 00000000..d30a39a7
--- /dev/null
+++ b/skops/card/_templates.py
@@ -0,0 +1,167 @@
+"""Templates for model cards
+
+To add a new template, define it as a dictionary where the key is the section
+and the value is the content of the section. If the content is empty but should
+be filled by the user, set it to be the ``CONTENT_PLACEHOLDER``.
+
+After defining the template itself, add it as another enum value in the
+``Templates`` enum.
+
+Finally, if there is a corresponding section in the new template, some methods
+on the ``Card`` class should be adjusted to make use of the template. First of
+all, ``_fill_default_sections`` should be used to populate the model card with
+the template.
+
+Furthermore, some specific methods might require adjusting. For example, the
+``Card._add_hyperparams`` method will add a table of model hyperparameters, but
+it needs to know in what section to put them. So if the template contains a
+corresponding section, modify the method to put the hyperparameters inside that
+section.
+
+"""
+
+from enum import Enum
+
+
+class Templates(Enum):
+ skops = "skops"
+
+
+CONTENT_PLACEHOLDER = "[More Information Needed]"
+"""When there is a section but content has yet to be added by the user, show
+this"""
+
+# fmt: off
+SKOPS_TEMPLATE = {
+ "Model description": CONTENT_PLACEHOLDER,
+ "Model description/Intended uses & limitations": CONTENT_PLACEHOLDER,
+ "Model description/Training Procedure": "",
+ "Model description/Training Procedure/Hyperparameters": CONTENT_PLACEHOLDER,
+ "Model description/Training Procedure/Model Plot": "The model plot is below.",
+ "Model description/Evaluation Results": CONTENT_PLACEHOLDER,
+ "How to Get Started with the Model": CONTENT_PLACEHOLDER,
+ "Model Card Authors": (
+ f"This model card is written by following authors:\n\n{CONTENT_PLACEHOLDER}"
+ ),
+ "Model Card Contact": (
+ "You can contact the model card authors through following channels:\n"
+ f"{CONTENT_PLACEHOLDER}"
+ ),
+ "Citation": (
+ "Below you can find information related to citation.\n\n**BibTeX:**\n```\n"
+ f"{CONTENT_PLACEHOLDER}\n```"
+ ),
+}
+
+# The template below corresponds to the HF Hub default template, but is geared
+# towards deep learning models, especially language models, and thus is not a
+# good fit for most sklearn models.
+_HUB_TEMPLATE = {
+ "Model Card": "",
+ # Provide a quick summary of what the model is/does.
+ "Model Details": "",
+ "Model Details/Model Description": "",
+ # Provide a longer summary of what this model is.
+ "Model Details/Model Description/Developed by": CONTENT_PLACEHOLDER,
+ "Model Details/Model Description/Shared by [optional]": CONTENT_PLACEHOLDER,
+ "Model Details/Model Description/Model type": CONTENT_PLACEHOLDER,
+ "Model Details/Model Description/Language(s) (NLP)": CONTENT_PLACEHOLDER,
+ "Model Details/Model Description/License": CONTENT_PLACEHOLDER,
+ "Model Details/Model Description/Finetuned from model [optional]": CONTENT_PLACEHOLDER,
+ "Model Details/Model Description/Resources for more information": CONTENT_PLACEHOLDER,
+
+ "Uses": "",
+ # Address questions around how the model is intended to be used, including
+ # the foreseeable users of the model and those affected by the model.
+ "Uses/Direct Use": CONTENT_PLACEHOLDER,
+ # This section is for the model use without fine-tuning or plugging into a
+ # larger ecosystem/app.
+ "Uses/Downstream Use [optional]": CONTENT_PLACEHOLDER,
+ # This section is for the model use when fine-tuned for a task, or when
+ # plugged into a larger ecosystem/app.
+ "Uses/Out-of-Scope Use": CONTENT_PLACEHOLDER,
+ # This section addresses misuse, malicious use, and uses that the model will
+ # not work well for.
+
+ "Bias, Risks, and Limitations": CONTENT_PLACEHOLDER,
+ # This section is meant to convey both technical and sociotechnical
+ # limitations.
+ "Bias, Risks, and Limitations/Recommendations": (
+ "Users (both direct and downstream) should be made aware of the risks, biases "
+ "and limitations of the model. More information needed for further "
+ "recommendations."
+ ),
+ # This section is meant to convey recommendations with respect to the bias,
+ # risk, and technical limitations.
+
+ "Training Details": "",
+ "Training Details/Training Data": CONTENT_PLACEHOLDER,
+ # This should link to a Data Card, perhaps with a short stub of information
+ # on what the training data is all about as well as documentation related to
+ # data pre-processing or additional filtering.
+ "Training Details/Training Procedure [optional]": "",
+ # This relates heavily to the Technical Specifications. Content here should
+ # link to that section when it is relevant to the training procedure.
+ "Training Details/Training Procedure [optional]/Preprocessing": CONTENT_PLACEHOLDER,
+ "Training Details/Training Procedure [optional]/Speeds, Sizes, Times": CONTENT_PLACEHOLDER,
+ # This section provides information about throughput, start/end time,
+ # checkpoint size if relevant, etc.
+
+ "Evaluation": "",
+ # This section describes the evaluation protocols and provides the results.
+ "Evaluation/Testing Data, Factors & Metrics": "",
+ "Evaluation/Testing Data, Factors & Metrics/Testing Data": CONTENT_PLACEHOLDER,
+ # This should link to a Data Card if possible
+ "Evaluation/Testing Data, Factors & Metrics/Factors": CONTENT_PLACEHOLDER,
+ # These are the things the evaluation is disaggregating by, e.g.,
+ # subpopulations or domains.
+ "Evaluation/Testing Data, Factors & Metrics/Metrics": CONTENT_PLACEHOLDER,
+ # These are the evaluation metrics being used, ideally with a description of
+ # why.
+ "Evaluation/Results": CONTENT_PLACEHOLDER,
+
+ "Model Examination [optional]": CONTENT_PLACEHOLDER,
+ # Relevant interpretability work for the model goes here.
+
+ "Environmental Impact": (
+ "Carbon emissions can be estimated using the "
+ "[Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) "
+ "presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700)."
+ ),
+ # Total emissions (in grams of CO2eq) and additional considerations, such as
+ # electricity usage, go here. Edit the suggested text below accordingly"
+ "Environmental Impact/Hardware Type": CONTENT_PLACEHOLDER,
+ "Environmental Impact/Hours used": CONTENT_PLACEHOLDER,
+ "Environmental Impact/Cloud Provider": CONTENT_PLACEHOLDER,
+ "Environmental Impact/Compute Region": CONTENT_PLACEHOLDER,
+ "Environmental Impact/Carbon Emitted": CONTENT_PLACEHOLDER,
+
+ "Technical Specifications [optional]": "",
+ "Technical Specifications [optional]/Model Architecture and Objective": CONTENT_PLACEHOLDER,
+ "Technical Specifications [optional]/Compute Infrastructure": CONTENT_PLACEHOLDER,
+ "Technical Specifications [optional]/Compute Infrastructure/Hardware": CONTENT_PLACEHOLDER,
+ "Technical Specifications [optional]/Compute Infrastructure/Software": CONTENT_PLACEHOLDER,
+
+ "Citation [optional]": "",
+ # If there is a paper or blog post introducing the model, the APA and Bibtex
+ # information for that should go in this section.
+ "Citation [optional]/BibTeX": CONTENT_PLACEHOLDER,
+ "Citation [optional]/APA": CONTENT_PLACEHOLDER,
+
+ "Glossary [optional]": "",
+ # If relevant, include terms and calculations in this section that can help
+ # readers understand the model or model card.
+
+ "More Information [optional]": CONTENT_PLACEHOLDER,
+ "Model Card Authors [optional]": CONTENT_PLACEHOLDER,
+ "Model Card Contact": CONTENT_PLACEHOLDER,
+ "How to Get Started with the Model": f"""Use the code below to get started with the model.
+
+
+ Click to expand
+
+{CONTENT_PLACEHOLDER}
+
+ """,
+}
+# fmt: on
diff --git a/skops/card/default_template.md b/skops/card/default_template.md
index edbc8d49..91141dfe 100644
--- a/skops/card/default_template.md
+++ b/skops/card/default_template.md
@@ -29,7 +29,7 @@ The model plot is below.
{{ model_plot }}
-## Evaluation Results
+## Evaluation Results
You can find the details about evaluation process and the evaluation results.
diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py
index 411fec11..92e1c68c 100644
--- a/skops/card/tests/test_card.py
+++ b/skops/card/tests/test_card.py
@@ -1,7 +1,8 @@
-import copy
import os
import pickle
+import re
import tempfile
+import textwrap
from pathlib import Path
import matplotlib.pyplot as plt
@@ -11,11 +12,16 @@
from huggingface_hub import CardData, metadata_load
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.neighbors import KNeighborsClassifier
-import skops
from skops import hub_utils
from skops.card import Card, metadata_from_config
-from skops.card._model_card import PlotSection, TableSection, _load_model
+from skops.card._model_card import (
+ SKOPS_TEMPLATE,
+ PlotSection,
+ TableSection,
+ _load_model,
+)
from skops.io import dump
@@ -135,12 +141,82 @@ def test_save_model_card(destination_path, model_card):
assert (Path(destination_path) / "README.md").exists()
-def test_hyperparameter_table(destination_path, model_card):
- model_card = model_card.render()
- assert "fit_intercept" in model_card
+CUSTOM_TEMPLATES = [None, {}, {"A Title", "Another Title", "A Title/A Section"}] # type: ignore
+
+
+class TestAddModelPlot:
+ """Tests for the sklearn model repr"""
+
+ def test_default(self, model_card):
+ result = model_card.select(
+ "Model description/Training Procedure/Model Plot"
+ ).content
+ # don't compare whole text, as it's quite long and non-deterministic
+ assert result.startswith("The model plot is below.\n\n