diff --git a/docs/changes.rst b/docs/changes.rst index 7e208380..f1aa7739 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -21,6 +21,11 @@ v0.4 :pr:`242` by `Merve Noyan`_. - Persistence now supports bytes and bytearrays, added tests to verify that LightGBM, XGBoost, and CatBoost work now. :pr:`244` by `Benjamin Bossan`_. +- :class:`.card.Card` now allows to add content to existing sections, using a + ``/`` to separate the subsections. E.g. use ``card.add(**{"Existing + section/New section": "content"})`` to add "content" a new subsection called + "New section" to an existing section called "Existing section". :pr:`203` by + `Benjamin Bossan`_. v0.3 ---- diff --git a/docs/model_card.rst b/docs/model_card.rst index c1fe0fac..7a6c124d 100644 --- a/docs/model_card.rst +++ b/docs/model_card.rst @@ -42,32 +42,30 @@ to touch it yourself. The markdown part does not necessarily need to follow any specification in terms of information passed, which gives the user a lot of flexibility. The -markdown part of the ``README.md`` file is generated from a Jinja template -with slots that you can inject your content in. ``skops`` has a default -template which includes the following slots for free text sections: - -- ``"model_description"``: A description of the model. -- ``"limitations"``: Intended use for the model, limitations and potential - biases. This section should also include risks of using models in certain - domains if relevant. -- ``"get_started_code"``: Code the user can run to load and use the model. -- ``"model_card_authors"``: Authors of the model card. This section includes - authors of the model card, while ``"citation_bibtex"`` includes citations - related to the model if relevant. -- ``"model_card_contact"``: Contact information of people whom can be reached +markdown part of the ``README.md`` file comes with a couple of defaults provided +by ``skops``, which includes the following slots for free text sections: + +- ``"Mode description"``: A description of the model. +- ``"Intended uses & limitations"``: Intended use for the model, limitations and + potential biases. This section should also include risks of using models in + certain domains if relevant. +- ``"How to Get Started with the Model"``: Code the user can run to load and use + the model. +- ``"Model Card Authors"``: Authors of the model card. This section includes + authors of the model card +- ``"Model Card Contact"``: Contact information of people whom can be reached out, in case of questions about the model or the model card. -- ``"citation_bibtex"``: Bibtex style citations for the model or resources used - to train the model. -- ``"eval_methods"``: Details about evaluation process of the model. -- ``"eval_results"``: Evaluation results that are later parsed as a table by - :class:`skops.card.Card`. +- ``"Citation"``: Bibtex style citations for the model or resources used to + train the model. +- ``"Evaluation Results"``: Evaluation results that are later parsed as a table + by :class:`skops.card.Card`. The template also contains the following sections that are automatically generated by ``skops``. -- ``"hyperparameter_table"``: Hyperparameters of the model. -- ``"model_plot"``: A diagram of the model, most relevant in case the model is +- ``"Hyperparameters"``: Hyperparameters of the model. +- ``"Model Plot"``: A diagram of the model, most relevant in case the model is a complex scikit-learn :class:`~sklearn.pipeline.Pipeline`. Furthermore, it is possible to add plots and tables to the model card. To add @@ -77,5 +75,31 @@ dictionaries with the key being the header and the values being list of row entries, or a pandas ``DataFrame``; use the :meth:`.Card.add_table` method for this. +To add content to an existing subsection, or create a new subsection, use a +``"/"`` to indicate the subsection. E.g. let's assume you would like to add a +subsection called ``"Figures"`` to the existing section ``"Model description"``, +as well as adding some subsections with plots below that, you can call the +:meth:`Card.add` method like this: + +.. code-block:: python + + card.add(**{"Model description/Figures": "Here are some nice figures"}) + card.add_plot(**{ + "Model description/Figures/Confusion Matrix": "path-to-confusion-matrix.png", + "Model description/Figures/ROC": "path-to-roc.png", + }) + +Furthermore, you can select existing sections (as well as their subsections) +using :meth:`Card.select`, and you can delete sections using +:meth:`Card.delete`: + +.. code-block:: python + + section = card.select("Model description/Figures") + print(section.content) # 'Here are some nice figures' + print(section.subsections) + card.delete("Model description/Figures/ROC") + + To see how you can use the API in ``skops`` to create a model card, please refer to :ref:`sphx_glr_auto_examples_plot_model_card.py`. diff --git a/examples/plot_model_card.py b/examples/plot_model_card.py index 4acdfd1b..7a5ff3b3 100644 --- a/examples/plot_model_card.py +++ b/examples/plot_model_card.py @@ -29,7 +29,8 @@ ) from sklearn.model_selection import HalvingGridSearchCV, train_test_split -from skops import card, hub_utils +from skops import hub_utils +from skops.card import Card, metadata_from_config # %% # Data @@ -91,7 +92,7 @@ # :func:`.hub_utils.init` above. We will see below how we can populate the model # card with useful information. -model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo))) +model_card = Card(model, metadata=metadata_from_config(Path(local_repo))) # %% # Add more information @@ -103,17 +104,19 @@ model_card.metadata.license = "mit" limitations = "This model is not ready to be used in production." model_description = ( - "This is a HistGradientBoostingClassifier model trained on breast cancer dataset." - " It's trained with Halving Grid Search Cross Validation, with parameter grids on" - " max_leaf_nodes and max_depth." + "This is a `HistGradientBoostingClassifier` model trained on breast cancer " + "dataset. It's trained with `HalvingGridSearchCV`, with parameter grids on " + "`max_leaf_nodes` and `max_depth`." ) model_card_authors = "skops_user" -citation_bibtex = "bibtex\n@inproceedings{...,year={2020}}" +citation_bibtex = "**BibTeX**\n\n```\n@inproceedings{...,year={2020}}\n```" model_card.add( - citation_bibtex=citation_bibtex, - model_card_authors=model_card_authors, - limitations=limitations, - model_description=model_description, + **{ + "Citation": citation_bibtex, + "Model Card Authors": model_card_authors, + "Model description": model_description, + "Model description/Intended uses & limitations": limitations, + } ) # %% @@ -132,10 +135,10 @@ y_pred = model.predict(X_test) eval_descr = ( - "The model is evaluated on test data using accuracy and F1-score with macro" - " average." + "The model is evaluated on test data using accuracy and F1-score with " + "macro average." ) -model_card.add(eval_method=eval_descr) +model_card.add(**{"Model description/Evaluation Results": eval_descr}) accuracy = accuracy_score(y_test, y_pred) f1 = f1_score(y_test, y_pred, average="micro") @@ -146,7 +149,9 @@ disp.plot() disp.figure_.savefig(Path(local_repo) / "confusion_matrix.png") -model_card.add_plot(**{"Confusion matrix": "confusion_matrix.png"}) +model_card.add_plot( + **{"Model description/Evaluation Results/Confusion Matrix": "confusion_matrix.png"} +) cv_results = model.cv_results_ clf_report = classification_report( @@ -160,8 +165,8 @@ model_card.add_table( folded=True, **{ - "Hyperparameter search results": cv_results, - "Classification report": clf_report, + "Model description/Evaluation Results/Hyperparameter search results": cv_results, + "Model description/Evaluation Results/Classification report": clf_report, }, ) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 6073e863..2adafa62 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -1,22 +1,21 @@ from __future__ import annotations -import copy import json import re -import shutil -import tempfile +import textwrap import zipfile -from dataclasses import dataclass +from collections.abc import Mapping +from dataclasses import dataclass, field from pathlib import Path from reprlib import Repr -from typing import Any, Optional, Union +from typing import Any, Iterator, Literal, Protocol, Sequence, Union import joblib -from huggingface_hub import ModelCard, ModelCardData +from huggingface_hub import ModelCardData from sklearn.utils import estimator_html_repr from tabulate import tabulate # type: ignore -import skops +from skops.card._templates import CONTENT_PLACEHOLDER, SKOPS_TEMPLATE, Templates from skops.io import load # Repr attributes can be used to control the behavior of repr @@ -25,6 +24,13 @@ aRepr.maxstring = 79 +VALID_TEMPLATES = {item.value for item in Templates} +NEED_SECTION_ERR_MSG = ( + "You are trying to {action} but you're using a custom template, please pass the " + "'section' argument to determine where to put the content" +) + + def wrap_as_details(text: str, folded: bool) -> str: if not folded: return text @@ -35,7 +41,7 @@ def _clean_table(table: str) -> str: # replace line breaks "\n" with html tag
, however, leave end-of-line # line breaks (eol_lb) intact eol_lb = "|\n" - placeholder = "$%!?" # arbitrary sting that never appears naturally + placeholder = "$%!?" # arbitrary string that never appears naturally table = ( table.replace(eol_lb, placeholder) .replace("\n", "
") @@ -64,7 +70,7 @@ def __repr__(self) -> str: class TableSection: """Adds a table to the model card""" - table: dict[str, list[Any]] + table: Mapping[str, Sequence[Any]] folded: bool = False def __post_init__(self) -> None: @@ -76,17 +82,11 @@ def __post_init__(self) -> None: self._is_pandas_df = False if self._is_pandas_df: - if self.table.empty: # type: ignore - raise ValueError("Empty table added") + ncols = len(self.table.columns) # type: ignore else: ncols = len(self.table) - if ncols == 0: - raise ValueError("Empty table added") - - key = next(iter(self.table.keys())) - nrows = len(self.table[key]) - if nrows == 0: - raise ValueError("Empty table added") + if ncols == 0: + raise ValueError("Trying to add table with no columns") def format(self) -> str: if self._is_pandas_df: @@ -165,6 +165,129 @@ def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData: return card_data +def split_subsection_names(key: str) -> list[str]: + r"""Split a string containing multiple sections into a list of strings for + each. + + The separator is ``"/"``. To avoid splitting on ``"/"``, escape it using + ``"\\/"``. + + Examples + -------- + >>> split_subsection_names("Section A") + ['Section A'] + >>> split_subsection_names("Section A/Section B/Section C") + ['Section A', 'Section B', 'Section C'] + >>> split_subsection_names("A section containg \\/ a slash") + ['A section containg / a slash'] + >>> split_subsection_names("Spaces are / stripped") + ['Spaces are', 'stripped'] + + Parameters + ---------- + key : str + The section name consisting potentially of multiple subsections. It has + to be ensured beforhand that this is not an empty string. + + Returns + ------- + parts : list of str + The individual (sub)sections. + + """ + placeholder = "$%!?" # arbitrary sting that never appears naturally + key = key.replace("\\/", placeholder) + parts = (part.strip() for part in key.split("/")) + return [part.replace(placeholder, "/") for part in parts] + + +def _getting_started_code( + file_name: str, model_format: Literal["pickle", "skops"], indent=" " +) -> list[str]: + # get lines of code required to load the model + lines = [ + "import json", + "import pandas as pd", + ] + if model_format == "skops": + lines += ["import skops.io as sio"] + else: + lines += ["import joblib"] + + if model_format == "skops": + lines += [f'model = sio.load("{file_name}")'] + else: # pickle + lines += [f'model = joblib.load("{file_name}")'] + + lines += [ + 'with open("config.json") as f:', + indent + "config = json.load(f)", + 'model.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))', + ] + return lines + + +@dataclass +class Section: + """Building block of the model card. + + The model card is represented internally as a dict with keys being strings + and values being Sections. The key is identical to the section title. + + Additionally, the section may hold content in the form of strings (can be an + empty string) or a ``Formattable``, which is simply an object with a + ``format`` method that returns a string. + + Finally, the section can contain subsections, which again are dicts of + string keys and section values (the dict can be empty). Therefore, the model + card representation forms a tree structure, making use of the fact that dict + order is preserved. + + """ + + title: str + content: Formattable | str + subsections: dict[str, Section] = field(default_factory=dict) + + def select(self, key: str) -> Section: + """Return a subsection or subsubsection of this section + + Parameters + ---------- + key : str + The name of the (sub)section to select. When selecting a subsection, + either use a ``"/"`` in the name to separate the parent and child + sections, chain multiple ``select`` calls. + + Returns + ------- + section : Section + A dataclass containing all information relevant to the selected + section. Those are the title, the content, and subsections (in a + dict). + + Raises + ------ + KeyError + If the given section name was not found, a ``KeyError`` is raised. + """ + section_names = split_subsection_names(key) + # check that no section name is empty + if not all(bool(name) for name in section_names): + msg = f"Section name cannot be empty but got '{key}'" + raise KeyError(msg) + + section = self + for section_name in section_names: + section = section.subsections[section_name] + return section + + +class Formattable(Protocol): + def format(self) -> str: + ... # pragma: no cover + + def _load_model(model: Any, trusted=False) -> Any: """Return a model instance. @@ -210,8 +333,7 @@ class Card: This class can be used to write information and plots to model card and save it. This class by default generates an interactive plot of the model and a - table of hyperparameters. The slots to be filled are defined in the markdown - template. + table of hyperparameters. Some sections are added by default. Parameters ---------- @@ -232,6 +354,17 @@ class Card: of the ``config.json`` file, which itself is created by :func:`skops.hub_utils.init`. + template: "skops", dict, or None (default="skops") + Whether to add default sections or not. The template can be a predefined + template, which at the moment can only be the string ``"skops"``, which + is a template provided by ``skops`` that is geared towards typical + sklearn models. If you don't want any prefilled sections, just pass + ``None``. If you want custom prefilled sections, pass a ``dict``, where + keys are the sections and values are the contents of the sections. Note + that when you use no template or a custom template, some methods will + not work, e.g. :meth:`Card.add_metrics`, since it's not clear where to + put the metrics when there is no template or a custom template. + trusted: bool, default=False Passed to :func:`skops.io.load` if the model is a file path and it's a `skops` file. @@ -245,13 +378,6 @@ class Card: Metadata to be stored at the beginning of the saved model card, as metadata to be understood by the Hugging Face Hub. - Notes - ----- - The contents of the sections of the template can be set using - :meth:`Card.add` method. Plots can be added to the model card using - :meth:`Card.add_plot`. The key you pass to :meth:`Card.add_plot` will be - used as the header of the plot. - Examples -------- >>> from sklearn.metrics import ( @@ -264,20 +390,17 @@ class Card: >>> from pathlib import Path >>> from sklearn.datasets import load_iris >>> from sklearn.linear_model import LogisticRegression - >>> from skops import card + >>> from skops.card import Card >>> X, y = load_iris(return_X_y=True) >>> model = LogisticRegression(solver="liblinear", random_state=0).fit(X, y) - >>> model_card = card.Card(model) + >>> model_card = Card(model) >>> model_card.metadata.license = "mit" >>> y_pred = model.predict(X) >>> model_card.add_metrics(**{ ... "accuracy": accuracy_score(y, y_pred), ... "f1 score": f1_score(y, y_pred, average="micro"), ... }) - Card( - model=LogisticRegression(random_state=0, solver='liblinear'), - metadata.license=mit, - ) + Card(...) >>> cm = confusion_matrix(y, y_pred,labels=model.classes_) >>> disp = ConfusionMatrixDisplay( ... confusion_matrix=cm, @@ -288,64 +411,547 @@ class Card: >>> tmp_path = Path(tempfile.mkdtemp(prefix="skops-")) >>> disp.figure_.savefig(tmp_path / "confusion_matrix.png") ... - >>> model_card.add_plot(confusion_matrix="confusion_matrix.png") + >>> model_card.add_plot(**{ + ... "Model description/Confusion Matrix": tmp_path / "confusion_matrix.png" + ... }) + Card(...) + >>> # add new content to the existing section "Model description" + >>> model_card.add(**{"Model description": "This is the best model"}) + Card(...) + >>> # add content to a new section + >>> model_card.add(**{"A new section": "Please rate my model"}) + Card(...) + >>> # add new subsection to an existing section by using "/" + >>> model_card.add(**{"Model description/Model name": "This model is called Bob"}) Card( model=LogisticRegression(random_state=0, solver='liblinear'), metadata.license=mit, - confusion_matrix='...confusion_matrix.png', + Model description=This is the best model, + Model description/Training Procedure/... | | warm_start | False | , + Model description/Training Procedure/..., + Model description/Evaluation Results=...ccuracy | 0.96 | | f1 score | 0.96 |, + Model description/Confusion Matrix=...confusion_matrix.png'), + Model description/Model name=This model is called Bob, + A new section=Please rate my model, ) + >>> # save the card to a README.md file >>> model_card.save(tmp_path / "README.md") """ def __init__( self, - model: Any, + model, model_diagram: bool = True, - metadata: Optional[ModelCardData] = None, + metadata: ModelCardData | None = None, + template: Literal["skops"] | dict[str, str] | None = "skops", trusted: bool = False, ) -> None: self.model = model self.model_diagram = model_diagram - self._eval_results = {} # type: ignore - self._template_sections: dict[str, str] = {} - self._extra_sections: list[tuple[str, Any]] = [] self.metadata = metadata or ModelCardData() + self.template = template self.trusted = trusted + self._data: dict[str, Section] = {} + self._metrics: dict[str, str | float | int] = {} + + self._populate_template() + + def _populate_template(self): + """If initialized with a template, use it to populate the card.""" + if not self.template: + return + + if isinstance(self.template, str) and (self.template not in VALID_TEMPLATES): + valid_templates = ", ".join(f"'{val}'" for val in sorted(VALID_TEMPLATES)) + msg = ( + f"Unknown template '{self.template}', " + f"template must be one of the following values: {valid_templates}" + ) + raise ValueError(msg) + + if self.template == Templates.skops.value: + self.add(**SKOPS_TEMPLATE) + # for the skops template, automatically add some default sections + self.add_model_plot() + self.add_hyperparams() + self.add_get_started_code() + elif isinstance(self.template, Mapping): + self.add(**self.template) + def get_model(self) -> Any: - """Returns sklearn estimator object if ``Path``/``str`` - is provided. + """Returns sklearn estimator object. + + If the ``model`` is already loaded, return it as is. If the ``model`` + attribute is a ``Path``/``str``, load the model and return it. Returns ------- - model : Object - Model instance. + model : BaseEstimator + The model instance. + """ model = _load_model(self.model, self.trusted) + # Ideally, we would only call the method below if we *know* that the + # model has changed, but at the moment we have no way of knowing that return model - def add(self, **kwargs: str) -> "Card": - """Takes values to fill model card template. + def add(self, **kwargs: str | Formattable) -> Card: + """Add new section(s) to the model card. + + Add one or multiple sections to the model card. The section names are + taken from the keys and the contents are taken from the values. + + To add to an existing section, use a ``"/"`` in the section name, e.g.: + + ``card.add(**{"Existing section/New section": "content"})``. + + If the parent section does not exist, it will be added automatically. + + To add a section with ``"/"`` in its title (i.e. not inteded as a + subsection), escape the slash like so, ``"\\/"``, e.g.: + + ``card.add(**{"A section with\\/a slash in the title": "content"})``. + + If a section of the given name already exists, its content will be + overwritten. Parameters ---------- **kwargs : dict - Parameters to be set for the model card. These parameters - need to be sections of the underlying `jinja` template used. + The keys of the dictionary serve as the section title and the values + as the section content. It's possible to add to existing sections. + + Returns + ------- + self : object + Card object. + + """ + for key, val in kwargs.items(): + self._add_single(key, val) + return self + + def _select( + self, subsection_names: Sequence[str], create: bool = True + ) -> dict[str, Section]: + """Select a single section from the data. + + Parameters + ---------- + subsection_names: list of str + The subsection names, already split into individual subsections. + + create: bool (default=True) + Whether to create the subsection if it does not already exist or + not. + + Returns + ------- + section: dict of Section + A dict mapping the section key (identical to the title) to the + actual ``Section``, which is a dataclass that contains the actual + data of the section. + + Raises + ------ + KeyError + If the section does not exist and ``create=False``, raises a + ``KeyError``. + + """ + section = self._data + if not subsection_names: + return section + + for subsection_name in subsection_names: + section_maybe = section.get(subsection_name) + + # there are already subsections + if section_maybe is not None: + section = section_maybe.subsections + continue + + if create: + # no subsection, create + entry = Section(title=subsection_name, content="") + section[subsection_name] = entry + section = entry.subsections + else: + raise KeyError(f"Section {subsection_name} does not exist") + + return section + + def select(self, key: str) -> Section: + """Select a section from the model card. + + To select a subsection of an existing section, use a ``"/"`` in the + section name, e.g.: + + ``card.select("Main section/Subsection")``. + + Alternatively, multiple ``select`` calls can be chained: + + ``card.select("Main section").select("Subsection")``. + + Parameters + ---------- + key : str + The name of the (sub)section to select. When selecting a subsection, + either use a ``"/"`` in the name to separate the parent and child + sections, chain multiple ``select`` calls. + + Returns + ------- + self : Section + A dataclass containing all information relevant to the selected + section. Those are the title, the content, and subsections (in a + dict). + + Raises + ------ + KeyError + If the given section name was not found, a ``KeyError`` is raised. + + """ + if not key: + msg = f"Section name cannot be empty but got '{key}'" + raise KeyError(msg) + + *subsection_names, leaf_node_name = split_subsection_names(key) + + if not leaf_node_name: + msg = f"Section name cannot be empty but got '{key}'" + raise KeyError(msg) + + parent_section = self._select(subsection_names, create=False) + return parent_section[leaf_node_name] + + def delete(self, key: str | Sequence[str]) -> None: + """Delete a section from the model card. + + To delete a subsection of an existing section, use a ``"/"`` in the + section name, e.g.: + + ``card.delete("Existing section/New section")``. + + Alternatively, a list of strings can be passed: + + ``card.delete(["Existing section", "New section"])``. + + Parameters + ---------- + key : str or list of str + The name of the (sub)section to select. When selecting a subsection, + either use a ``"/"`` in the name to separate the parent and child + sections, or pass a list of strings. + + Raises + ------ + KeyError + If the given section name was not found, a ``KeyError`` is raised. + + """ + if not key: + msg = f"Section name cannot be empty but got '{key}'" + raise KeyError(msg) + + if isinstance(key, str): + *subsection_names, leaf_node_name = split_subsection_names(key) + else: + *subsection_names, leaf_node_name = key + + if not leaf_node_name: + msg = f"Section name cannot be empty but got '{key}'" + raise KeyError(msg) + + parent_section = self._select(subsection_names, create=False) + del parent_section[leaf_node_name] + + def _add_single(self, key: str, val: Formattable | str) -> Section: + """Add a single section. + + If the (sub)section does not exist, it is created. Otherwise, the + existing (sub)section is modified. + + Parameters + ---------- + key: str + The name of the (sub)section. + + val: str or Formattable + The value to assign to the (sub)section. + + """ + *subsection_names, leaf_node_name = split_subsection_names(key) + section = self._select(subsection_names) + + if leaf_node_name in section: + # entry exists, only overwrite content + section[leaf_node_name].content = val + else: + # entry does not exist, create a new one + section[leaf_node_name] = Section(title=leaf_node_name, content=val) + + return section[leaf_node_name] + + def add_model_plot( + self, + section: str | None = None, + description: str | None = None, + ) -> Card: + """Add a model plot + + Use sklearn model visualization to add create a diagram of the model. + See the `sklearn model visualization docs + `_. + + The model diagram is not added if the card class was instantiated with + ``model_diagram=False``. + + Parameters + ---------- + section : str or None, default=None + The section that the model plot should be added to. If you're using + the default skops template, you can leave this parameter as + ``None``, otherwise you have to indicate the section. If the section + does not exist, it will be created for you. + + description : str or None, default=None + An optional description to be added before the model plot. If you're + using the default skops template, a standard text is used. Pass a + string here if you want to use your own text instead. Leave this + empty to not add any description. + + Returns + ------- + self : object + Card object. + """ + if not self.model_diagram: + return self + + if section is None: + if self.template == Templates.skops.value: + section = "Model description/Training Procedure/Model Plot" + else: + msg = NEED_SECTION_ERR_MSG.format(action="add a model plot") + raise ValueError(msg) + + if description is None: + if self.template == Templates.skops.value: + description = "The model plot is below." + + self._add_model_plot(self.get_model(), section=section, description=description) + + return self + + def _add_model_plot( + self, model: Any, section: str, description: str | None + ) -> None: + """Add model plot section + + The model should be a loaded sklearn model, not a path. + + """ + model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(model))) + if model_plot_div.count("sk-top-container") == 1: + model_plot_div = model_plot_div.replace( + "sk-top-container", 'sk-top-container" style="overflow: auto;' + ) + + if description: + content = f"{description}\n\n{model_plot_div}" + else: + content = model_plot_div + + self._add_single(section, content) + + def add_hyperparams( + self, section: str | None = None, description: str | None = None + ) -> Card: + """Add the model's hyperparameters as a table + + Parameters + ---------- + section : str or None, default=None + The section that the hyperparamters should be added to. If you're + using the default skops template, you can leave this parameter as + ``None``, otherwise you have to indicate the section. If the section + does not exist, it will be created for you. + + description : str or None, default=None + An optional description to be added before the hyperparamters. If + you're using the default skops template, a standard text is used. + Pass a string here if you want to use your own text instead. Leave + this empty to not add any description. + + Returns + ------- + self : object + Card object. + + """ + if section is None: + if self.template == Templates.skops.value: + section = "Model description/Training Procedure/Hyperparameters" + else: + msg = NEED_SECTION_ERR_MSG.format(action="add model hyperparameters") + raise ValueError(msg) + + if description is None: + if self.template == Templates.skops.value: + description = "The model is trained with below hyperparameters." + + self._add_hyperparams( + self.get_model(), section=section, description=description + ) + return self + + def _add_hyperparams( + self, model: Any, section: str, description: str | None + ) -> None: + """Add hyperparameter section. + + The model should be a loaded sklearn model, not a path. + + """ + hyperparameter_dict = model.get_params(deep=True) + table = _clean_table( + tabulate( + list(hyperparameter_dict.items()), + headers=["Hyperparameter", "Value"], + tablefmt="github", + ) + ) + table_folded = textwrap.dedent( + """ +
+ Click to expand + + {} + +
""" + ).format(table) + + if description: + content = f"{description}\n{table_folded}" + else: + content = table_folded + + self._add_single(section, content) + + def add_get_started_code( + self, + section: str | None = None, + description: str | None = None, + file_name: str | None = None, + model_format: Literal["pickle", "skops"] | None = None, + ) -> Card: + """Add getting started code + + This code can be copied by users to load the model and make predictions + with it. + + Parameters + ---------- + section : str or None, default=None + The section that the code should be added to. If you're using the + default skops template, you can leave this parameter as ``None``, + otherwise you have to indicate the section. If the section does not + exist, it will be created for you. + + description : str or None, default=None + An optional description to be added before the code. If you're using + the default skops template, a standard text is used. Pass a string + here if you want to use your own text instead. Leave this empty to + not add any description. + + file_name : str or None, default=None + The file name of the model. If no file name is indicated, there will + be an attempt to read the file name from the card's metadata. If + that fails, an error is raised and you have to pass this argument + explicitly. + + model_format : "skops", "pickle", or None, default=None + The model format used to store the model.If format is indicated, + there will be an attempt to read the model format from the card's + metadata. If that fails, an error is raised and you have to pass + this argument explicitly. Returns ------- self : object Card object. + """ - for section, value in kwargs.items(): - self._template_sections[section] = value + if file_name is None: + file_name = self.metadata.to_dict().get("model_file") + + if model_format is None: + model_format = ( + self.metadata.to_dict().get("sklearn", {}).get("model_format") + ) + + if model_format and (model_format not in ("pickle", "skops")): + msg = ( + f"Invalid model format '{model_format}', should be one of " + "'pickle' or 'skops'" + ) + raise ValueError(msg) + + if (not file_name) or (not model_format): + return self + + if section is None: + if self.template == Templates.skops.value: + section = "How to Get Started with the Model" + else: + msg = NEED_SECTION_ERR_MSG.format(action="add get started code") + raise ValueError(msg) + + if description is None: + if self.template == Templates.skops.value: + description = "Use the code below to get started with the model." + + self._add_get_started_code( + section, + file_name=file_name, + model_format=model_format, + description=description, + ) + return self - def add_plot(self, folded=False, **kwargs: str) -> "Card": + def _add_get_started_code( + self, + section: str, + file_name: str, + model_format: Literal["pickle", "skops"], + description: str | None, + indent: str = " ", + ) -> None: + """Add getting started code to the corresponding section""" + lines = _getting_started_code( + file_name, model_format=model_format, indent=indent + ) + lines = ["```python"] + lines + ["```"] + code = "\n".join(lines) + + if description: + content = f"{description}\n\n{code}" + else: + content = code + + self._add_single(section, content) + + def add_plot(self, *, folded=False, **kwargs: str) -> Card: """Add plots to the model card. + The plot should be saved on the file system and the path passed as + value. + Parameters ---------- folded: bool (default=False) @@ -355,22 +961,27 @@ def add_plot(self, folded=False, **kwargs: str) -> "Card": large. **kwargs : dict - The arguments should be of the form `name=plot_path`, where `name` - is the name of the plot and `plot_path` is the path to the plot, - relative to the root of the project. The plots should have already - been saved under the project's folder. + The arguments should be of the form ``name=plot_path``, where + ``name`` is the name of the plot and section, and ``plot_path`` is + the path to the plot on the file system, relative to the root of the + project. The plots should have already been saved under the + project's folder. Returns ------- self : object Card object. + """ - for plot_name, plot_path in kwargs.items(): + for section_name, plot_path in kwargs.items(): + plot_name = split_subsection_names(section_name)[-1] section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded) - self._extra_sections.append((plot_name, section)) + self._add_single(section_name, section) return self - def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> Card: + def add_table( + self, *, folded: bool = False, **kwargs: dict["str", list[Any]] + ) -> Card: """Add a table to the model card. Add a table to the model card. This can be especially useful when you @@ -417,212 +1028,213 @@ def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> C """ for key, val in kwargs.items(): section = TableSection(table=val, folded=folded) - self._extra_sections.append((key, section)) + self._add_single(key, section) return self - def add_metrics(self, **kwargs: str) -> "Card": + def add_metrics( + self, + section: str | None = None, + description: str | None = None, + **kwargs: str | int | float, + ) -> Card: """Add metric values to the model card. + All metrics will be collected in, and then formatted to, a table. + Parameters ---------- + section : str or None, default=None + The section that the metrics should be added to. If you're using the + default skops template, you can leave this parameter as ``None``, + otherwise you have to indicate the section. If the section does not + exist, it will be created for you. + + description : str or None, default=None + An optional description to be added before the metrics. If you're + using the default skops template, a standard text is used. Pass a + string here if you want to use your own text instead. Leave this + empty to not add any description. + **kwargs : dict - A dictionary of the form `{metric name: metric value}`. + A dictionary of the form ``{metric name: metric value}``. Returns ------- self : object Card object. """ - for metric, value in kwargs.items(): - self._eval_results[metric] = value - return self + if section is None: + if self.template == Templates.skops.value: + section = "Model description/Evaluation Results" + else: + msg = NEED_SECTION_ERR_MSG.format(action="add metrics") + raise ValueError(msg) + + if description is None: + if self.template == Templates.skops.value: + description = ( + "You can find the details about evaluation process and " + "the evaluation results." + ) - def _generate_card(self) -> ModelCard: - """Generate the ModelCard object + self._metrics.update(kwargs) + self._add_metrics(section, self._metrics, description=description) + return self - Returns - ------- - card : huggingface_hub.ModelCard - The final :class:`huggingface_hub.ModelCard` object with all - placeholders filled and all extra sections inserted. - """ - root = skops.__path__ - - # add evaluation results - - template_sections = copy.deepcopy(self._template_sections) - - if self.metadata: - model_file = self.metadata.to_dict().get("model_file") - if model_file and model_file.endswith(".skops"): - template_sections["get_started_code"] = ( - "from skops.io import load\nimport json\n" - "import pandas as pd\n" - f'clf = load("{model_file}")\n' - 'with open("config.json") as f:\n ' - " config =" - " json.load(f)\n" - 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' - ) - elif model_file is not None: - template_sections["get_started_code"] = ( - "import joblib\nimport json\nimport pandas as pd\nclf =" - f' joblib.load({model_file})\nwith open("config.json") as' - " f:\n " - " config =" - " json.load(f)\n" - 'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))' - ) - if self.model_diagram is True: - model_plot_div = re.sub( - r"\n\s+", "", str(estimator_html_repr(self.get_model())) - ) - if model_plot_div.count("sk-top-container") == 1: - model_plot_div = model_plot_div.replace( - "sk-top-container", 'sk-top-container" style="overflow: auto;' - ) - model_plot: str | None = model_plot_div + def _add_metrics( + self, + section: str, + metrics: dict[str, str | float | int], + description: str | None, + ) -> None: + """Add metrics to the Evaluation Results section.""" + if self._metrics: + data_transposed = zip(*self._metrics.items()) # make column oriented + inp = {key: val for key, val in zip(["Metric", "Value"], data_transposed)} + table = TableSection(inp).format() else: - model_plot = None - template_sections["eval_results"] = tabulate( - list(self._eval_results.items()), - headers=["Metric", "Value"], - tablefmt="github", - ) + # create empty table + table = TableSection({"Metric": [], "Value": []}).format() - # if template path is not given, use default - if template_sections.get("template_path") is None: - template_sections["template_path"] = str( - Path(root[0]) / "card" / "default_template.md" - ) + if description: + content = f"{description}\n\n{table}" + else: + content = table - # copying the template so that the original template is not touched/changed - # append plot_name if any plots are provided, at the end of the template - with tempfile.TemporaryDirectory() as tmpdirname: - shutil.copyfile( - template_sections["template_path"], - f"{tmpdirname}/temporary_template.md", - ) - # create a temporary template with the additional plots - template_sections["template_path"] = f"{tmpdirname}/temporary_template.md" - # add extra sections at the end of the template - with open(template_sections["template_path"], "a") as template: - if self._extra_sections: - template.write("\n\n# Additional Content\n") - - for key, val in self._extra_sections: - formatted = val.format() - template.write(f"\n## {key}\n\n{formatted}\n") - - card = ModelCard.from_template( - card_data=self.metadata, - hyperparameter_table=self._extract_estimator_config(), - model_plot=model_plot, - **template_sections, - ) - return card + self._add_single(section, content) - def save(self, path: str | Path) -> None: - """Save the model card. + def _generate_metadata(self, metadata: ModelCardData) -> Iterator[str]: + """Yield metadata in yaml format""" + for key, val in metadata.to_dict().items() if metadata else {}: + yield aRepr.repr(f"metadata.{key}={val},").strip('"').strip("'") - This method renders the model card in markdown format and then saves it - as the specified file. + def _generate_content( + self, data: dict[str, Section], depth: int = 1 + ) -> Iterator[str]: + """Yield title and (formatted) contents. - Parameters - ---------- - path: str, or Path - Filepath to save your card. + Recursively go through the data and consecutively yield the title with + the appropriate number of "#"s (markdown format), then the associated + content. - Notes - ----- - The keys in model card metadata can be seen `here - `__. """ - card = self._generate_card() - card.save(path) + for val in data.values(): + title = f"{depth * '#'} {val.title}" + yield title - def render(self) -> str: - """Render the final model card as a string. + if isinstance(val.content, str): + yield val.content + else: # is a Formattable + yield val.content.format() - Returns - ------- - card : str - The rendered model card with all placeholders filled and all extra - sections inserted. - """ - card = self._generate_card() - return str(card) + if val.subsections: + yield from self._generate_content(val.subsections, depth=depth + 1) - def _extract_estimator_config(self) -> str: - """Extracts estimator hyperparameters and renders them into a vertical table. + def _iterate_content( + self, data: dict[str, Section], parent_section: str = "" + ) -> Iterator[tuple[str, Formattable | str]]: + """Yield tuples of title and (non-formatted) content.""" + for val in data.values(): + if parent_section: + title = "/".join((parent_section, val.title)) + else: + title = val.title - Returns - ------- - str: - Markdown table of hyperparameters. - """ - hyperparameter_dict = self.get_model().get_params(deep=True) - return _clean_table( - tabulate( - list(hyperparameter_dict.items()), - headers=["Hyperparameter", "Value"], - tablefmt="github", - ) - ) + yield title, val.content + + if val.subsections: + yield from self._iterate_content(val.subsections, parent_section=title) @staticmethod - def _strip_blank(text) -> str: - # remove new lines and multiple spaces + def _format_repr(text: str) -> str: + # Remove new lines, multiple spaces, quotation marks, and cap line length text = text.replace("\n", " ") text = re.sub(r"\s+", r" ", text) - return text + return aRepr.repr(text).strip('"').strip("'") def __str__(self) -> str: return self.__repr__() def __repr__(self) -> str: - # create repr for model + # repr for the model model = getattr(self, "model", None) if model: - model_str = self._strip_blank(repr(self.get_model())) - model_repr = aRepr.repr(f" model={model_str},").strip('"').strip("'") + model_repr = self._format_repr(f"model={repr(self.get_model())},") else: model_repr = None - # metadata + # repr for metadata metadata_reprs = [] for key, val in self.metadata.to_dict().items() if self.metadata else {}: if key == "widget": - metadata_reprs.append(" metadata.widget={...},") + metadata_reprs.append("metadata.widget={...},") continue - metadata_reprs.append( - aRepr.repr(f" metadata.{key}={val},").strip('"').strip("'") - ) + metadata_reprs.append(self._format_repr(f"metadata.{key}={val},")) metadata_repr = "\n".join(metadata_reprs) - # normal sections - template_reprs = [] - for key, val in self._template_sections.items(): - val = self._strip_blank(repr(val)) - template_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'")) - template_repr = "\n".join(template_reprs) - - # figures - figure_reprs = [] - for key, val in self._extra_sections: - val = self._strip_blank(repr(val)) - figure_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'")) - figure_repr = "\n".join(figure_reprs) + # repr for contents + content_reprs = [] + for title, content in self._iterate_content(self._data): + if not content: + continue + if isinstance(content, str) and content.rstrip("`").rstrip().endswith( + CONTENT_PLACEHOLDER + ): + # if content is just some default text, no need to show it + continue + content_reprs.append(self._format_repr(f"{title}={content},")) + content_repr = "\n".join(content_reprs) + # combine all parts complete_repr = "Card(\n" if model_repr: - complete_repr += model_repr + "\n" + complete_repr += textwrap.indent(model_repr, " ") + "\n" if metadata_reprs: - complete_repr += metadata_repr + "\n" - if template_repr: - complete_repr += template_repr + "\n" - if figure_repr: - complete_repr += figure_repr + "\n" + complete_repr += textwrap.indent(metadata_repr, " ") + "\n" + if content_reprs: + complete_repr += textwrap.indent(content_repr, " ") + "\n" complete_repr += ")" return complete_repr + + def _generate_card(self) -> Iterator[str]: + """Yield sections of the model card, including the metadata.""" + if self.metadata.to_dict(): + yield f"---\n{self.metadata.to_yaml()}\n---" + + for line in self._generate_content(self._data): + if line: + yield "\n" + line + + # add an empty line add the end + yield "" + + def save(self, path: str | Path) -> None: + """Save the model card. + + This method renders the model card in markdown format and then saves it + as the specified file. + + Parameters + ---------- + path: str, or Path + Filepath to save your card. + + Notes + ----- + The keys in model card metadata can be seen `here + `__. + """ + with open(path, "w", encoding="utf-8") as f: + f.write("\n".join(self._generate_card())) + + def render(self) -> str: + """Render the final model card as a string. + + Returns + ------- + result : str + The rendered model card with all placeholders filled and all extra + sections inserted. + """ + return "\n".join(self._generate_card()) diff --git a/skops/card/_templates.py b/skops/card/_templates.py new file mode 100644 index 00000000..d30a39a7 --- /dev/null +++ b/skops/card/_templates.py @@ -0,0 +1,167 @@ +"""Templates for model cards + +To add a new template, define it as a dictionary where the key is the section +and the value is the content of the section. If the content is empty but should +be filled by the user, set it to be the ``CONTENT_PLACEHOLDER``. + +After defining the template itself, add it as another enum value in the +``Templates`` enum. + +Finally, if there is a corresponding section in the new template, some methods +on the ``Card`` class should be adjusted to make use of the template. First of +all, ``_fill_default_sections`` should be used to populate the model card with +the template. + +Furthermore, some specific methods might require adjusting. For example, the +``Card._add_hyperparams`` method will add a table of model hyperparameters, but +it needs to know in what section to put them. So if the template contains a +corresponding section, modify the method to put the hyperparameters inside that +section. + +""" + +from enum import Enum + + +class Templates(Enum): + skops = "skops" + + +CONTENT_PLACEHOLDER = "[More Information Needed]" +"""When there is a section but content has yet to be added by the user, show +this""" + +# fmt: off +SKOPS_TEMPLATE = { + "Model description": CONTENT_PLACEHOLDER, + "Model description/Intended uses & limitations": CONTENT_PLACEHOLDER, + "Model description/Training Procedure": "", + "Model description/Training Procedure/Hyperparameters": CONTENT_PLACEHOLDER, + "Model description/Training Procedure/Model Plot": "The model plot is below.", + "Model description/Evaluation Results": CONTENT_PLACEHOLDER, + "How to Get Started with the Model": CONTENT_PLACEHOLDER, + "Model Card Authors": ( + f"This model card is written by following authors:\n\n{CONTENT_PLACEHOLDER}" + ), + "Model Card Contact": ( + "You can contact the model card authors through following channels:\n" + f"{CONTENT_PLACEHOLDER}" + ), + "Citation": ( + "Below you can find information related to citation.\n\n**BibTeX:**\n```\n" + f"{CONTENT_PLACEHOLDER}\n```" + ), +} + +# The template below corresponds to the HF Hub default template, but is geared +# towards deep learning models, especially language models, and thus is not a +# good fit for most sklearn models. +_HUB_TEMPLATE = { + "Model Card": "", + # Provide a quick summary of what the model is/does. + "Model Details": "", + "Model Details/Model Description": "", + # Provide a longer summary of what this model is. + "Model Details/Model Description/Developed by": CONTENT_PLACEHOLDER, + "Model Details/Model Description/Shared by [optional]": CONTENT_PLACEHOLDER, + "Model Details/Model Description/Model type": CONTENT_PLACEHOLDER, + "Model Details/Model Description/Language(s) (NLP)": CONTENT_PLACEHOLDER, + "Model Details/Model Description/License": CONTENT_PLACEHOLDER, + "Model Details/Model Description/Finetuned from model [optional]": CONTENT_PLACEHOLDER, + "Model Details/Model Description/Resources for more information": CONTENT_PLACEHOLDER, + + "Uses": "", + # Address questions around how the model is intended to be used, including + # the foreseeable users of the model and those affected by the model. + "Uses/Direct Use": CONTENT_PLACEHOLDER, + # This section is for the model use without fine-tuning or plugging into a + # larger ecosystem/app. + "Uses/Downstream Use [optional]": CONTENT_PLACEHOLDER, + # This section is for the model use when fine-tuned for a task, or when + # plugged into a larger ecosystem/app. + "Uses/Out-of-Scope Use": CONTENT_PLACEHOLDER, + # This section addresses misuse, malicious use, and uses that the model will + # not work well for. + + "Bias, Risks, and Limitations": CONTENT_PLACEHOLDER, + # This section is meant to convey both technical and sociotechnical + # limitations. + "Bias, Risks, and Limitations/Recommendations": ( + "Users (both direct and downstream) should be made aware of the risks, biases " + "and limitations of the model. More information needed for further " + "recommendations." + ), + # This section is meant to convey recommendations with respect to the bias, + # risk, and technical limitations. + + "Training Details": "", + "Training Details/Training Data": CONTENT_PLACEHOLDER, + # This should link to a Data Card, perhaps with a short stub of information + # on what the training data is all about as well as documentation related to + # data pre-processing or additional filtering. + "Training Details/Training Procedure [optional]": "", + # This relates heavily to the Technical Specifications. Content here should + # link to that section when it is relevant to the training procedure. + "Training Details/Training Procedure [optional]/Preprocessing": CONTENT_PLACEHOLDER, + "Training Details/Training Procedure [optional]/Speeds, Sizes, Times": CONTENT_PLACEHOLDER, + # This section provides information about throughput, start/end time, + # checkpoint size if relevant, etc. + + "Evaluation": "", + # This section describes the evaluation protocols and provides the results. + "Evaluation/Testing Data, Factors & Metrics": "", + "Evaluation/Testing Data, Factors & Metrics/Testing Data": CONTENT_PLACEHOLDER, + # This should link to a Data Card if possible + "Evaluation/Testing Data, Factors & Metrics/Factors": CONTENT_PLACEHOLDER, + # These are the things the evaluation is disaggregating by, e.g., + # subpopulations or domains. + "Evaluation/Testing Data, Factors & Metrics/Metrics": CONTENT_PLACEHOLDER, + # These are the evaluation metrics being used, ideally with a description of + # why. + "Evaluation/Results": CONTENT_PLACEHOLDER, + + "Model Examination [optional]": CONTENT_PLACEHOLDER, + # Relevant interpretability work for the model goes here. + + "Environmental Impact": ( + "Carbon emissions can be estimated using the " + "[Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) " + "presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700)." + ), + # Total emissions (in grams of CO2eq) and additional considerations, such as + # electricity usage, go here. Edit the suggested text below accordingly" + "Environmental Impact/Hardware Type": CONTENT_PLACEHOLDER, + "Environmental Impact/Hours used": CONTENT_PLACEHOLDER, + "Environmental Impact/Cloud Provider": CONTENT_PLACEHOLDER, + "Environmental Impact/Compute Region": CONTENT_PLACEHOLDER, + "Environmental Impact/Carbon Emitted": CONTENT_PLACEHOLDER, + + "Technical Specifications [optional]": "", + "Technical Specifications [optional]/Model Architecture and Objective": CONTENT_PLACEHOLDER, + "Technical Specifications [optional]/Compute Infrastructure": CONTENT_PLACEHOLDER, + "Technical Specifications [optional]/Compute Infrastructure/Hardware": CONTENT_PLACEHOLDER, + "Technical Specifications [optional]/Compute Infrastructure/Software": CONTENT_PLACEHOLDER, + + "Citation [optional]": "", + # If there is a paper or blog post introducing the model, the APA and Bibtex + # information for that should go in this section. + "Citation [optional]/BibTeX": CONTENT_PLACEHOLDER, + "Citation [optional]/APA": CONTENT_PLACEHOLDER, + + "Glossary [optional]": "", + # If relevant, include terms and calculations in this section that can help + # readers understand the model or model card. + + "More Information [optional]": CONTENT_PLACEHOLDER, + "Model Card Authors [optional]": CONTENT_PLACEHOLDER, + "Model Card Contact": CONTENT_PLACEHOLDER, + "How to Get Started with the Model": f"""Use the code below to get started with the model. + +
+ Click to expand + +{CONTENT_PLACEHOLDER} + +
""", +} +# fmt: on diff --git a/skops/card/default_template.md b/skops/card/default_template.md index edbc8d49..91141dfe 100644 --- a/skops/card/default_template.md +++ b/skops/card/default_template.md @@ -29,7 +29,7 @@ The model plot is below. {{ model_plot }} -## Evaluation Results +## Evaluation Results You can find the details about evaluation process and the evaluation results. diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 411fec11..92e1c68c 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -1,7 +1,8 @@ -import copy import os import pickle +import re import tempfile +import textwrap from pathlib import Path import matplotlib.pyplot as plt @@ -11,11 +12,16 @@ from huggingface_hub import CardData, metadata_load from sklearn.datasets import load_iris from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.neighbors import KNeighborsClassifier -import skops from skops import hub_utils from skops.card import Card, metadata_from_config -from skops.card._model_card import PlotSection, TableSection, _load_model +from skops.card._model_card import ( + SKOPS_TEMPLATE, + PlotSection, + TableSection, + _load_model, +) from skops.io import dump @@ -135,12 +141,82 @@ def test_save_model_card(destination_path, model_card): assert (Path(destination_path) / "README.md").exists() -def test_hyperparameter_table(destination_path, model_card): - model_card = model_card.render() - assert "fit_intercept" in model_card +CUSTOM_TEMPLATES = [None, {}, {"A Title", "Another Title", "A Title/A Section"}] # type: ignore + + +class TestAddModelPlot: + """Tests for the sklearn model repr""" + + def test_default(self, model_card): + result = model_card.select( + "Model description/Training Procedure/Model Plot" + ).content + # don't compare whole text, as it's quite long and non-deterministic + assert result.startswith("The model plot is below.\n\n