diff --git a/skops/card/_markup.py b/skops/card/_markup.py
index 32f94b36..2e42a25c 100644
--- a/skops/card/_markup.py
+++ b/skops/card/_markup.py
@@ -266,7 +266,7 @@ def _table(self, item) -> str:
data_transposed = zip(*body)
table = {key: val for key, val in zip(columns, data_transposed)}
- res = TableSection(table).format()
+ res = TableSection(title="", content="", table=table).format()
return res
def _parse_div(self, item) -> str:
diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py
index 273d78c3..0cb642d2 100644
--- a/skops/card/_model_card.py
+++ b/skops/card/_model_card.py
@@ -9,7 +9,7 @@
from dataclasses import dataclass, field
from pathlib import Path
from reprlib import Repr
-from typing import Any, Iterator, Literal, Protocol, Sequence, Union
+from typing import Any, Iterator, Literal, Sequence, Union
import joblib
from huggingface_hub import ModelCardData
@@ -58,66 +58,6 @@ def _clean_table(table: str) -> str:
return table
-@dataclass
-class PlotSection:
- """Adds a link to a figure to the model card"""
-
- alt_text: str
- path: str | Path
- folded: bool = False
-
- def format(self) -> str:
- text = f""
- return wrap_as_details(text, folded=self.folded)
-
- def __repr__(self) -> str:
- return repr(self.path)
-
-
-@dataclass
-class TableSection:
- """Adds a table to the model card"""
-
- table: Mapping[str, Sequence[Any]]
- folded: bool = False
-
- def __post_init__(self) -> None:
- try:
- import pandas as pd
-
- self._is_pandas_df = isinstance(self.table, pd.DataFrame)
- except ImportError:
- self._is_pandas_df = False
-
- if self._is_pandas_df:
- ncols = len(self.table.columns) # type: ignore
- else:
- ncols = len(self.table)
- if ncols == 0:
- raise ValueError("Trying to add table with no columns")
-
- def format(self) -> str:
- if self._is_pandas_df:
- headers = self.table.columns # type: ignore
- else:
- headers = self.table.keys()
-
- table = _clean_table(
- tabulate(self.table, tablefmt="github", headers=headers, showindex=False)
- )
- return wrap_as_details(table, folded=self.folded)
-
- def __repr__(self) -> str:
- if self._is_pandas_df:
- nrows, ncols = self.table.shape # type: ignore
- else:
- # table cannot be empty, so no checks needed here
- ncols = len(self.table)
- key = next(iter(self.table.keys()))
- nrows = len(self.table[key])
- return f"Table({nrows}x{ncols})"
-
-
def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData:
"""Construct a ``ModelCardData`` object from a ``config.json`` file.
@@ -244,7 +184,7 @@ class Section:
"""Building block of the model card.
The model card is represented internally as a dict with keys being strings
- and values being Sections. The key is identical to the section title.
+ and values being ``Section``s. The key is identical to the section title.
Additionally, the section may hold content in the form of strings (can be an
empty string) or a ``Formattable``, which is simply an object with a
@@ -261,7 +201,7 @@ class Section:
"""
title: str
- content: Formattable | str
+ content: str
subsections: dict[str, Section] = field(default_factory=dict)
visible: bool = True
@@ -279,13 +219,14 @@ def select(self, key: str) -> Section:
-------
section : Section
A dataclass containing all information relevant to the selected
- section. Those are the title, the content, and subsections (in a
- dict).
+ section. Those are the title, the content, subsections (in a dict),
+ and additional fields that depend on the type of section.
Raises
------
KeyError
If the given section name was not found, a ``KeyError`` is raised.
+
"""
section_names = split_subsection_names(key)
# check that no section name is empty
@@ -293,15 +234,97 @@ def select(self, key: str) -> Section:
msg = f"Section name cannot be empty but got '{key}'"
raise KeyError(msg)
- section = self
+ section: Section = self
for section_name in section_names:
section = section.subsections[section_name]
return section
+ def format(self) -> str:
+ return self.content
+
+ def __repr__(self) -> str:
+ """Generates the ``repr`` of this section.
+
+ ``repr`` determines how the content of this section is shown in the
+ Card's repr.
+ """
+ return self.content
+
+
+@dataclass
+class PlotSection(Section):
+ """Adds a link to a figure to the model card"""
+
+ path: str | Path = ""
+ alt_text: str = ""
+ folded: bool = False
+
+ def __post_init__(self) -> None:
+ if not self.path:
+ raise TypeError(f"{self.__class__.__name__} requires a path")
-class Formattable(Protocol):
def format(self) -> str:
- ... # pragma: no cover
+ # if no alt text provided, fall back to figure path
+ alt_text = self.alt_text or self.path
+ text = f""
+ val = wrap_as_details(text, folded=self.folded)
+ if self.content:
+ val = f"{self.content}\n\n{val}"
+ return val
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}({self.path})"
+
+
+@dataclass
+class TableSection(Section):
+ """Adds a table to the model card"""
+
+ table: Mapping[str, Sequence[Any]] = field(default_factory=dict)
+ folded: bool = False
+
+ def __post_init__(self) -> None:
+ self._check_table()
+
+ def _check_table(self) -> None:
+ try:
+ import pandas as pd
+
+ self._is_pandas_df = isinstance(self.table, pd.DataFrame)
+ except ImportError:
+ self._is_pandas_df = False
+
+ if self._is_pandas_df:
+ ncols = len(self.table.columns) # type: ignore
+ else:
+ ncols = len(self.table)
+ if ncols == 0:
+ raise ValueError("Trying to add table with no columns")
+
+ def format(self) -> str:
+ if self._is_pandas_df:
+ headers = self.table.columns # type: ignore
+ else:
+ headers = self.table.keys()
+
+ table = _clean_table(
+ tabulate(self.table, tablefmt="github", headers=headers, showindex=False)
+ )
+ val = wrap_as_details(table, folded=self.folded)
+
+ if self.content:
+ val = f"{self.content}\n\n{val}"
+ return val
+
+ def __repr__(self) -> str:
+ if self._is_pandas_df:
+ nrows, ncols = self.table.shape # type: ignore
+ else:
+ # table cannot be empty, so no checks needed here
+ ncols = len(self.table)
+ key = next(iter(self.table.keys()))
+ nrows = len(self.table[key])
+ return f"{self.__class__.__name__}({nrows}x{ncols})"
def _load_model(model: Any, trusted=False) -> Any:
@@ -443,10 +466,10 @@ class Card:
model=LogisticRegression(random_state=0, solver='liblinear'),
metadata.license=mit,
Model description=This is the best model,
- Model description/Training Procedure/... | | warm_start | False | ,
+ Model description/Training Procedure/Hyperparameters=TableSection(15x2),
Model description/Training Procedure/...,
- Model description/Evaluation Results=...ccuracy | 0.96 | | f1 score | 0.96 |,
- Model description/Confusion Matrix=...confusion_matrix.png'),
+ Model description/Evaluation Results=TableSection(2x2),
+ Model description/Confusion Matrix=Pl...confusion_matrix.png),
Model description/Model name=This model is called Bob,
A new section=Please rate my model,
)
@@ -513,7 +536,7 @@ def get_model(self) -> Any:
# model has changed, but at the moment we have no way of knowing that
return model
- def add(self, **kwargs: str | Formattable) -> Self:
+ def add(self, **kwargs: str) -> Self:
"""Add new section(s) to the model card.
Add one or multiple sections to the model card. The section names are
@@ -685,7 +708,7 @@ def delete(self, key: str | Sequence[str]) -> None:
parent_section = self._select(subsection_names, create=False)
del parent_section[leaf_node_name]
- def _add_single(self, key: str, val: Formattable | str) -> Section:
+ def _add_single(self, key: str, val: str | Section) -> Section:
"""Add a single section.
If the (sub)section does not exist, it is created. Otherwise, the
@@ -696,8 +719,10 @@ def _add_single(self, key: str, val: Formattable | str) -> Section:
key: str
The name of the (sub)section.
- val: str or Formattable
- The value to assign to the (sub)section.
+ val: str or Section
+ The value to assign to the (sub)section. If this is already a
+ section, leave it as it is. If it's a string, create a
+ :class:`skops.card._model_card.Section`.
Returns
-------
@@ -708,13 +733,27 @@ def _add_single(self, key: str, val: Formattable | str) -> Section:
*subsection_names, leaf_node_name = split_subsection_names(key)
section = self._select(subsection_names)
- if leaf_node_name in section:
- # entry exists, only overwrite content
- section[leaf_node_name].content = val
+ if isinstance(val, str):
+ # val is a str, create a Section
+ new_section = Section(title=leaf_node_name, content=val)
else:
- # entry does not exist, create a new one
- section[leaf_node_name] = Section(title=leaf_node_name, content=val)
+ # val is already a section and can be used as is
+ new_section = val
+ if leaf_node_name in section:
+ # entry exists, preserve its subsections
+ old_section = section[leaf_node_name]
+ if new_section.subsections and (
+ new_section.subsections != old_section.subsections
+ ):
+ msg = (
+ f"Trying to override section '{leaf_node_name}' but found "
+ "conflicting subsections."
+ )
+ raise ValueError(msg)
+ new_section.subsections = old_section.subsections
+
+ section[leaf_node_name] = new_section
return section[leaf_node_name]
def add_model_plot(
@@ -764,12 +803,14 @@ def add_model_plot(
if self.template == Templates.skops.value:
description = "The model plot is below."
- self._add_model_plot(self.get_model(), section=section, description=description)
+ self._add_model_plot(
+ self.get_model(), section_name=section, description=description
+ )
return self
def _add_model_plot(
- self, model: Any, section: str, description: str | None
+ self, model: Any, section_name: str, description: str | None
) -> None:
"""Add model plot section
@@ -787,7 +828,10 @@ def _add_model_plot(
else:
content = model_plot_div
- self._add_single(section, content)
+ description = description or ""
+ title = split_subsection_names(section_name)[-1]
+ section = Section(title=title, content=content)
+ self._add_single(section_name, section)
def add_hyperparams(
self, section: str | None = None, description: str | None = None
@@ -826,42 +870,27 @@ def add_hyperparams(
description = "The model is trained with below hyperparameters."
self._add_hyperparams(
- self.get_model(), section=section, description=description
+ self.get_model(), section_name=section, description=description
)
return self
def _add_hyperparams(
- self, model: Any, section: str, description: str | None
+ self, model: Any, section_name: str, description: str | None
) -> None:
"""Add hyperparameter section.
The model should be a loaded sklearn model, not a path.
"""
- hyperparameter_dict = model.get_params(deep=True)
- table = _clean_table(
- tabulate(
- list(hyperparameter_dict.items()),
- headers=["Hyperparameter", "Value"],
- tablefmt="github",
- )
- )
- table_folded = textwrap.dedent(
- """
-
- Click to expand
-
- {}
+ params = model.get_params(deep=True)
+ table = {"Hyperparameter": list(params.keys()), "Value": list(params.values())}
- """
- ).format(table)
-
- if description:
- content = f"{description}\n{table_folded}"
- else:
- content = table_folded
-
- self._add_single(section, content)
+ description = description or ""
+ title = split_subsection_names(section_name)[-1]
+ section = TableSection(
+ title=title, content=description, table=table, folded=True
+ )
+ self._add_single(section_name, section)
def add_get_started_code(
self,
@@ -947,7 +976,7 @@ def add_get_started_code(
def _add_get_started_code(
self,
- section: str,
+ section_name: str,
file_name: str,
model_format: Literal["pickle", "skops"],
description: str | None,
@@ -965,9 +994,18 @@ def _add_get_started_code(
else:
content = code
- self._add_single(section, content)
+ title = split_subsection_names(section_name)[-1]
+ section = Section(title=title, content=content)
+ self._add_single(section_name, section)
- def add_plot(self, *, folded=False, **kwargs: str) -> Self:
+ def add_plot(
+ self,
+ *,
+ description: str | None = None,
+ alt_text: str | None = None,
+ folded=False,
+ **kwargs: str,
+ ) -> Self:
"""Add plots to the model card.
The plot should be saved on the file system and the path passed as
@@ -975,6 +1013,20 @@ def add_plot(self, *, folded=False, **kwargs: str) -> Self:
Parameters
----------
+ description: str or None (default=None)
+ If a string is passed as description, it is shown before the figure.
+ If multiple figures are added with one call, they all get the same
+ description. To add multiple figures with different descriptions,
+ call this method multiple times.
+
+ alt_text: : str or None (default=None)
+ If a string is passed as ``alt_text``, it is used as the alternative
+ text for the figure (i.e. what is shown if the figure cannot be
+ rendered). If this argument is ``None``, the alt_text will just be
+ the same as the section title. If multiple figures are added with
+ one call, they all get the same alt text. To add multiple figures
+ with different alt texts, call this method multiple times.
+
folded: bool (default=False)
If set to ``True``, the plot will be enclosed in a ``details`` tag.
That means the content is folded by default and users have to click
@@ -994,14 +1046,26 @@ def add_plot(self, *, folded=False, **kwargs: str) -> Self:
Card object.
"""
+ description = description or ""
for section_name, plot_path in kwargs.items():
- plot_name = split_subsection_names(section_name)[-1]
- section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded)
+ title = split_subsection_names(section_name)[-1]
+ alt_text = alt_text or title
+ section = PlotSection(
+ title=title,
+ content=description,
+ alt_text=alt_text,
+ path=plot_path,
+ folded=folded,
+ )
self._add_single(section_name, section)
return self
def add_table(
- self, *, folded: bool = False, **kwargs: dict["str", list[Any]]
+ self,
+ *,
+ description: str | None = None,
+ folded: bool = False,
+ **kwargs: dict["str", list[Any]],
) -> Self:
"""Add a table to the model card.
@@ -1027,6 +1091,12 @@ def add_table(
Parameters
----------
+ description: str or None (default=None)
+ If a string is passed as description, it is shown before the table.
+ If multiple tables are added with one call, they all get the same
+ description. To add multiple tables with different descriptions,
+ call this method multiple times.
+
folded: bool (default=False)
If set to ``True``, the table will be enclosed in a ``details`` tag.
That means the content is folded by default and users have to click
@@ -1047,8 +1117,11 @@ def add_table(
Card object.
"""
+ description = description or ""
for key, val in kwargs.items():
- section = TableSection(table=val, folded=folded)
+ section = TableSection(
+ title=key, content=description, table=val, folded=folded
+ )
self._add_single(key, section)
return self
@@ -1098,7 +1171,7 @@ def add_metrics(
"the evaluation results."
)
self._metrics.update(kwargs)
- self._add_metrics(section, self._metrics, description=description)
+ self._add_metrics(section, description=description, metrics=self._metrics)
return self
def add_permutation_importances(
@@ -1108,6 +1181,7 @@ def add_permutation_importances(
plot_file: str = "permutation_importances.png",
plot_name: str = "Permutation Importances",
overwrite: bool = False,
+ description: str | None = None,
) -> Self:
"""Plots permutation importance and saves it to model card.
@@ -1129,6 +1203,9 @@ def add_permutation_importances(
Whether to overwrite the permutation importance plot file, if a plot by that
name already exists.
+ description : str | None (default=None)
+ An optional description to be added before the plot.
+
Returns
-------
self : object
@@ -1151,31 +1228,31 @@ def add_permutation_importances(
ax.set_title(plot_name)
ax.set_xlabel("Decrease in Score")
plt.savefig(plot_file)
- self.add_plot(**{plot_name: plot_file})
+ self.add_plot(description=description, **{plot_name: plot_file})
return self
def _add_metrics(
self,
- section: str,
- metrics: dict[str, str | float | int],
+ section_name: str,
description: str | None,
+ metrics: dict[str, str | float | int],
) -> None:
"""Add metrics to the Evaluation Results section."""
if self._metrics:
- data_transposed = zip(*self._metrics.items()) # make column oriented
- inp = {key: val for key, val in zip(["Metric", "Value"], data_transposed)}
- table = TableSection(inp).format()
+ # transpose from row oriented to column oriented
+ data_transposed = zip(*self._metrics.items())
+ table = {
+ key: list(val) for key, val in zip(["Metric", "Value"], data_transposed)
+ }
else:
# create empty table
- table = TableSection({"Metric": [], "Value": []}).format()
+ table = {"Metric": [], "Value": []}
- if description:
- content = f"{description}\n\n{table}"
- else:
- content = table
-
- self._add_single(section, content)
+ description = description or ""
+ title = split_subsection_names(section_name)[-1]
+ section = TableSection(title=title, content=description, table=table)
+ self._add_single(section_name, section)
def _generate_metadata(self, metadata: ModelCardData) -> Iterator[str]:
"""Yield metadata in yaml format"""
@@ -1192,24 +1269,21 @@ def _generate_content(
content.
"""
- for val in data.values():
- if not val.visible:
+ for section in data.values():
+ if not section.visible:
continue
- title = f"{depth * '#'} {val.title}"
+ title = f"{depth * '#'} {section.title}"
yield title
- if isinstance(val.content, str):
- yield val.content
- else: # is a Formattable
- yield val.content.format()
+ yield section.format()
- if val.subsections:
- yield from self._generate_content(val.subsections, depth=depth + 1)
+ if section.subsections:
+ yield from self._generate_content(section.subsections, depth=depth + 1)
def _iterate_content(
self, data: dict[str, Section], parent_section: str = ""
- ) -> Iterator[tuple[str, Formattable | str]]:
+ ) -> Iterator[tuple[str, Section]]:
"""Yield tuples of title and (non-formatted) content."""
for val in data.values():
if parent_section:
@@ -1217,7 +1291,7 @@ def _iterate_content(
else:
title = val.title
- yield title, val.content
+ yield title, val
if val.subsections:
yield from self._iterate_content(val.subsections, parent_section=title)
@@ -1252,15 +1326,14 @@ def __repr__(self) -> str:
# repr for contents
content_reprs = []
- for title, content in self._iterate_content(self._data):
+ for title, section in self._iterate_content(self._data):
+ content = section.format()
if not content:
continue
- if isinstance(content, str) and content.rstrip("`").rstrip().endswith(
- CONTENT_PLACEHOLDER
- ):
+ if content.rstrip("`").rstrip().endswith(CONTENT_PLACEHOLDER):
# if content is just some default text, no need to show it
continue
- content_reprs.append(self._format_repr(f"{title}={content},"))
+ content_reprs.append(self._format_repr(f"{title}={section},"))
content_repr = "\n".join(content_reprs)
# combine all parts
diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py
index 9ad94277..4b74d724 100644
--- a/skops/card/tests/test_card.py
+++ b/skops/card/tests/test_card.py
@@ -8,7 +8,7 @@
import numpy as np
import pytest
import sklearn
-from huggingface_hub import CardData, metadata_load
+from huggingface_hub import ModelCardData, metadata_load
from sklearn.datasets import load_iris
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LinearRegression, LogisticRegression
@@ -20,6 +20,7 @@
from skops.card._model_card import (
SKOPS_TEMPLATE,
PlotSection,
+ Section,
TableSection,
_load_model,
)
@@ -263,7 +264,7 @@ def expected(self):
def test_default(self, model_card, expected):
result = model_card.select(
"Model description/Training Procedure/Hyperparameters"
- ).content
+ ).format()
# remove multiple whitespaces and dashes, as they're not important and may
# differ depending on OS
@@ -273,7 +274,7 @@ def test_default(self, model_card, expected):
def test_other_section(self, model_card, expected):
model_card.add_hyperparams(section="Other section")
- result = model_card.select("Other section").content
+ result = model_card.select("Other section").format()
# remove multiple whitespaces and dashes, as they're not important and may
# differ depending on OS
@@ -285,7 +286,7 @@ def test_other_description(self, model_card, expected):
model_card.add_hyperparams(description="Awesome hyperparams")
result = model_card.select(
"Model description/Training Procedure/Hyperparameters"
- ).content
+ ).format()
assert result.startswith("Awesome hyperparams")
@pytest.mark.parametrize("template", CUSTOM_TEMPLATES)
@@ -305,9 +306,9 @@ def test_add_twice(self, model_card):
# of sense
text1 = model_card.select(
"Model description/Training Procedure/Hyperparameters"
- ).content
+ ).format()
model_card.add_hyperparams(section="Other section")
- text2 = model_card.select("Other section").content
+ text2 = model_card.select("Other section").format()
assert text1 == text2
@@ -321,7 +322,7 @@ def get_params(self, deep=False):
model_card = Card(EstimatorWithLbInParams())
section_name = "Model description/Training Procedure/Hyperparameters"
- text_hyperparams = model_card.select(section_name).content
+ text_hyperparams = model_card.select(section_name).format()
# remove multiple whitespaces, as they're not important
text_cleaned = _strip_multiple_chars(text_hyperparams, " ")
@@ -333,13 +334,13 @@ class TestAddMetrics:
def test_default(self, model_card):
# by default, don't add a table, as there are no metrics
- result = model_card.select("Model description/Evaluation Results").content
+ result = model_card.select("Model description/Evaluation Results").format()
expected = "[More Information Needed]"
assert result == expected
def test_empty_metrics_table(self, model_card):
model_card.add_metrics()
- result = model_card.select("Model description/Evaluation Results").content
+ result = model_card.select("Model description/Evaluation Results").format()
expected = (
"You can find the details about evaluation process and the evaluation "
"results.\n\n"
@@ -354,7 +355,7 @@ def test_multiple_metrics(self, model_card):
f1=0.1, # float
awesomeness=123, # int
)
- result = model_card.select("Model description/Evaluation Results").content
+ result = model_card.select("Model description/Evaluation Results").format()
expected = (
"You can find the details about evaluation process and the evaluation "
"results.\n\n"
@@ -368,7 +369,7 @@ def test_multiple_metrics(self, model_card):
def test_other_section(self, model_card):
model_card.add_metrics(accuracy=0.9, section="Other section")
- result = model_card.select("Other section").content
+ result = model_card.select("Other section").format()
expected = (
"You can find the details about evaluation process and the evaluation "
"results.\n\n"
@@ -380,7 +381,7 @@ def test_other_section(self, model_card):
def test_other_description(self, model_card):
model_card.add_metrics(accuracy=0.9, description="Awesome metrics")
- result = model_card.select("Model description/Evaluation Results").content
+ result = model_card.select("Model description/Evaluation Results").format()
assert result.startswith("Awesome metrics")
@pytest.mark.parametrize("template", CUSTOM_TEMPLATES)
@@ -398,100 +399,123 @@ def test_add_twice(self, model_card):
# it's possible to add the section twice, even if it doesn't make a lot
# of sense
model_card.add_metrics(accuracy=0.9)
- text1 = model_card.select("Model description/Evaluation Results").content
+ text1 = model_card.select("Model description/Evaluation Results").format()
model_card.add_metrics(section="Other section")
- text2 = model_card.select("Other section").content
+ text2 = model_card.select("Other section").format()
assert text1 == text2
-def test_permutation_importances(
- iris_estimator, iris_data, model_card, destination_path
-):
- X, y = iris_data
- result = permutation_importance(
- iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2
- )
+class TestAddPermutationImportance:
+ @pytest.fixture
+ def importances(self, iris_estimator, iris_data):
+ X, y = iris_data
+ result = permutation_importance(
+ iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2
+ )
+ return result
- model_card.add_permutation_importances(
- result,
- X.columns,
- Path(destination_path) / "importance.png",
- "Permutation Importance",
- )
- temp_path = Path(destination_path) / "importance.png"
- assert f"
+ def test_permutation_importances(
+ self, iris_data, importances, model_card, destination_path
+ ):
+ X, _ = iris_data
+ model_card.add_permutation_importances(
+ importances,
+ columns=X.columns,
+ plot_file=Path(destination_path) / "importance.png",
+ plot_name="Permutation Importance",
+ )
+ temp_path = Path(destination_path) / "importance.png"
+ section = model_card.select("Permutation Importance")
+ expected = f""
+ assert section.format() == expected
+ def test_multiple_permutation_importances(
+ self, iris_data, iris_estimator, importances, model_card, destination_path
+ ):
+ X, y = iris_data
+ model_card.add_permutation_importances(
+ importances, X.columns, plot_file=Path(destination_path) / "importance.png"
+ )
-def test_multiple_permutation_importances(
- iris_estimator, iris_data, model_card, destination_path
-):
- X, y = iris_data
- result = permutation_importance(
- iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2
- )
- model_card.add_permutation_importances(
- result, X.columns, plot_file=Path(destination_path) / "importance.png"
- )
- f1 = make_scorer(f1_score, average="micro")
- result = permutation_importance(
- iris_estimator, X, y, scoring=f1, n_repeats=10, random_state=42, n_jobs=2
- )
- model_card.add_permutation_importances(
- result,
- X.columns,
- plot_file=Path(destination_path) / "f1_importance.png",
- plot_name="Permutation Importance on f1",
- )
- # check for default one
- temp_path = Path(destination_path) / "importance.png"
- assert f"
- # check for F1
- temp_path_f1 = Path(destination_path) / "f1_importance.png"
- assert f"
+ f1 = make_scorer(f1_score, average="micro")
+ importances_f1 = permutation_importance(
+ iris_estimator, X, y, scoring=f1, n_repeats=10, random_state=42, n_jobs=2
+ )
+ model_card.add_permutation_importances(
+ importances_f1,
+ columns=X.columns,
+ plot_file=Path(destination_path) / "f1_importance.png",
+ plot_name="Permutation Importance on f1",
+ )
+ # check for default one
+ temp_path = Path(destination_path) / "importance.png"
+ section = model_card.select("Permutation Importances")
+ expected = f""
+ assert section.format() == expected
-def test_duplicate_permutation_importances(
- iris_estimator, iris_data, model_card, destination_path
-):
- X, y = iris_data
- result = permutation_importance(
- iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2
- )
- plot_path = os.path.join(destination_path, "importance.png")
- model_card.add_permutation_importances(result, X.columns, plot_file=plot_path)
- with pytest.raises(
- ValueError,
- match=(
- "already exists. Set `overwrite` to `True` or pass a"
- " different filename for the plot."
- ),
+ # check for F1
+ temp_path_f1 = Path(destination_path) / "f1_importance.png"
+ section = model_card.select("Permutation Importance on f1")
+ expected = f""
+ assert section.format() == expected
+
+ def test_duplicate_permutation_importances(
+ self, iris_data, importances, model_card, destination_path
):
+ X, _ = iris_data
+ plot_path = os.path.join(destination_path, "importance.png")
+ model_card.add_permutation_importances(
+ importances, X.columns, plot_file=plot_path
+ )
+ with pytest.raises(
+ ValueError,
+ match=(
+ "already exists. Set `overwrite` to `True` or pass a"
+ " different filename for the plot."
+ ),
+ ):
+ model_card.add_permutation_importances(
+ importances,
+ columns=X.columns,
+ plot_file=plot_path,
+ plot_name="Permutation Importance on f1",
+ )
+
+ def test_duplicate_permutation_importances_overwrite(
+ self, iris_data, importances, model_card, destination_path
+ ):
+ X, _ = iris_data
+ plot_path = os.path.join(destination_path, "importance.png")
+ model_card.add_permutation_importances(
+ importances, X.columns, plot_file=plot_path
+ )
+
model_card.add_permutation_importances(
- result,
- X.columns,
+ importances,
+ columns=X.columns,
plot_file=plot_path,
plot_name="Permutation Importance on f1",
+ overwrite=True,
)
+ section = model_card.select("Permutation Importance on f1")
+ expected = f""
+ assert section.format() == expected
-
-def test_duplicate_permutation_importances_overwrite(
- iris_estimator, iris_data, model_card, destination_path
-):
- X, y = iris_data
- result = permutation_importance(
- iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2
- )
- plot_path = os.path.join(destination_path, "importance.png")
- model_card.add_permutation_importances(result, X.columns, plot_file=plot_path)
-
- model_card.add_permutation_importances(
- result,
- X.columns,
- plot_file=plot_path,
- plot_name="Permutation Importance on f1",
- overwrite=True,
- )
- assert f"
+ def test_permutation_importances_with_description(
+ self, iris_data, importances, model_card, destination_path
+ ):
+ X, _ = iris_data
+ model_card.add_permutation_importances(
+ importances,
+ columns=X.columns,
+ plot_file=Path(destination_path) / "importance.png",
+ description="Very important",
+ )
+ temp_path = Path(destination_path) / "importance.png"
+ section = model_card.select("Permutation Importances")
+ expected = f"Very important\n\n"
+ assert section.format() == expected
class TestAddGetStartedCode:
@@ -539,7 +563,7 @@ def model_card_skops(self, metadata_skops):
def test_default_pickle(self, model_card):
# by default, don't add a table, as there are no metrics
- result = model_card.select("How to Get Started with the Model").content
+ result = model_card.select("How to Get Started with the Model").format()
expected = (
"Use the code below to get started with the model.\n\n"
"```python\n"
@@ -556,7 +580,7 @@ def test_default_pickle(self, model_card):
def test_default_skops(self, model_card_skops):
# by default, don't add a table, as there are no metrics
- result = model_card_skops.select("How to Get Started with the Model").content
+ result = model_card_skops.select("How to Get Started with the Model").format()
expected = (
"Use the code below to get started with the model.\n\n"
"```python\n"
@@ -590,18 +614,18 @@ def to_dict(self):
def test_other_section(self, model_card):
model_card.add_get_started_code(section="Other section")
- result = model_card.select("Other section").content
+ result = model_card.select("Other section").format()
expected = "Use the code below to get started with the model."
assert result.startswith(expected)
def test_other_description(self, model_card):
model_card.add_get_started_code(description="Awesome code")
- result = model_card.select("How to Get Started with the Model").content
+ result = model_card.select("How to Get Started with the Model").format()
assert result.startswith("Awesome code")
def test_other_filename(self, model_card):
model_card.add_get_started_code(file_name="foobar.pkl")
- result = model_card.select("How to Get Started with the Model").content
+ result = model_card.select("How to Get Started with the Model").format()
expected = (
"Use the code below to get started with the model.\n\n"
"```python\n"
@@ -618,7 +642,7 @@ def test_other_filename(self, model_card):
def test_other_model_format(self, model_card):
model_card.add_get_started_code(model_format="skops")
- result = model_card.select("How to Get Started with the Model").content
+ result = model_card.select("How to Get Started with the Model").format()
expected = (
"Use the code below to get started with the model.\n\n"
"```python\n"
@@ -670,9 +694,9 @@ def test_custom_template_no_section_raises(self, template):
def test_add_twice(self, model_card):
# it's possible to add the section twice, even if it doesn't make a lot
# of sense
- text1 = model_card.select("How to Get Started with the Model").content
+ text1 = model_card.select("How to Get Started with the Model").format()
model_card.add_get_started_code(section="Other section")
- text2 = model_card.select("Other section").content
+ text2 = model_card.select("Other section").format()
assert text1 == text2
@@ -863,6 +887,70 @@ def test_add_content_to_existing_section(self, model_card):
assert num_subsection_before == num_subsection_after
assert section.content == "sklearn FTW"
+ def test_add_plain_section_works(self, model_card):
+ # It is allowed to add a *Section object, but it's not documented and
+ # users should normally not use that feature
+ section = Section("title may differ from section name", "some content")
+ model_card.add(
+ a_string="normal string",
+ a_section=section,
+ )
+ assert model_card.select("a_section") == section
+
+ def test_add_section_preserves_subsections(self, model_card):
+ # As explained in the previous test, users can theoretically add section
+ # instances. If they override an existing section with a new section,
+ # the subsections of the existing section should be preserved.
+
+ # first let's add a section and a subsection
+ model_card.add(**{"new section": "hello", "new section/subsection": "world"})
+ assert model_card.select("new section").format() == "hello"
+ assert model_card.select("new section/subsection").format() == "world"
+
+ # now let's override the section, the subsection should be preserved
+ new_section = Section("new section", "bonjour")
+ model_card.add(**{"new section": new_section})
+ assert model_card.select("new section").format() == "bonjour"
+ assert model_card.select("new section/subsection").format() == "world"
+
+ def test_add_section_with_identical_subsection_preserves_subsections(
+ self, model_card
+ ):
+ # As explained in the previous tests, users can theoretically add
+ # section instances. If they override an existing section with a new
+ # section, the subsections of the existing section should be preserved.
+ # If the new section they add has its own subsections, and these
+ # subsections are identical to the old subsections, that should be fine.
+
+ # first let's add a section and a subsection
+ model_card.add(**{"new section": "hello", "new section/subsection": "world"})
+
+ # now let's override the section using the same subsections
+ old_subsection = model_card.select("new section").subsections
+ new_section = Section("new section", "bonjour", subsections=old_subsection)
+ model_card.add(**{"new section": new_section})
+ assert model_card.select("new section").format() == "bonjour"
+ assert model_card.select("new section/subsection").format() == "world"
+
+ def test_add_section_with_different_subsection_raises(self, model_card):
+ # This is the same as the previous test, but now the section used to
+ # override the previous section has different subsections. Now we don't
+ # know what to do and should raise. This is okay because normally, a
+ # user shouldn't add section instances anyway.
+
+ # first let's add a section and a subsection
+ model_card.add(**{"new section": "hello", "new section/subsection": "world"})
+
+ # now let's override the section using different subsections
+ new_subsection = {"new subsection": Section("subsection", "mars")}
+ new_section = Section("new section", "bonjour", subsections=new_subsection)
+
+ match = (
+ "Trying to override section 'new section' but found conflicting subsections"
+ )
+ with pytest.raises(ValueError, match=match):
+ model_card.add(**{"new section": new_section})
+
class TestDelete:
"""Deleting sections and subsections"""
@@ -952,7 +1040,7 @@ def test_add_plot(self, destination_path, model_card):
plt.plot([4, 5, 6, 7])
plt.savefig(Path(destination_path) / "fig1.png")
model_card = model_card.add_plot(fig1="fig1.png")
- plot_content = model_card.select("fig1").content.format()
+ plot_content = model_card.select("fig1").format()
assert plot_content == ""
def test_add_plot_to_existing_section(self, destination_path, model_card):
@@ -961,9 +1049,27 @@ def test_add_plot_to_existing_section(self, destination_path, model_card):
plt.plot([4, 5, 6, 7])
plt.savefig(Path(destination_path) / "fig1.png")
model_card = model_card.add_plot(**{"Model description/Figure 1": "fig1.png"})
- plot_content = model_card.select("Model description/Figure 1").content.format()
+ plot_content = model_card.select("Model description/Figure 1").format()
assert plot_content == ""
+ def test_add_plot_with_description(self, destination_path, model_card):
+ import matplotlib.pyplot as plt
+
+ plt.plot([4, 5, 6, 7])
+ plt.savefig(Path(destination_path) / "fig1.png")
+ model_card = model_card.add_plot(description="My fancy plot", fig1="fig1.png")
+ plot_content = model_card.select("fig1").format()
+ assert plot_content == "My fancy plot\n\n"
+
+ def test_add_plot_with_alt_text(self, destination_path, model_card):
+ import matplotlib.pyplot as plt
+
+ plt.plot([4, 5, 6, 7])
+ plt.savefig(Path(destination_path) / "fig1.png")
+ model_card = model_card.add_plot(alt_text="the figure", fig1="fig1.png")
+ plot_content = model_card.select("fig1").format()
+ assert plot_content == ""
+
class TestMetadata:
def test_adding_metadata(self, model_card):
@@ -1129,34 +1235,17 @@ def expected_lines(self):
card_repr = """
Card(
model=LinearRegression(fit_intercept=False),
- Model description/Training Procedure/...ed | | positive | False | ,
+ Model description/Training Procedure/Hyperparameters=TableSection(4x2),
Model description/Training Procedure/...,
Model Card Authors=Jane Doe,
- Figures/ROC='ROC.png',
- Figures/Confusion matrix='confusion_matrix.jpg',
+ Figures/ROC=PlotSection(ROC.png),
+ Figures/Confusion matrix=PlotSection(confusion_matrix.jpg),
Model Description=A description,
- Search Results=Table(3x2),
+ Search Results=TableSection(3x2),
)
"""
expected = textwrap.dedent(card_repr).strip()
lines = expected.split("\n")
-
- # TODO: remove when dropping sklearn v0.24 and when dropping v1.1 and
- # below. This is because the "normalize" parameter was changed after
- # v0.24 will be removed completely in sklearn v1.2.
- major, minor, *_ = sklearn.__version__.split(".")
- if int(major) < 1:
- # v0.24: "deprecated" -> "False"
- lines[2] = (
- " Model description/Training Procedure/...se | | positive | False | "
- ","
- )
- elif int(minor) >= 2:
- # >= v1.2: remove argument completely
- lines[2] = (
- " Model description/Training Procedure/... | | | positive | False | "
- ","
- )
return lines
@pytest.mark.parametrize("meth", [repr, str])
@@ -1208,7 +1297,7 @@ def test_without_model_attribute(self, card: Card, meth, expected_lines):
@pytest.mark.parametrize("meth", [repr, str])
def test_with_metadata(self, card: Card, meth, expected_lines):
- metadata = CardData(
+ metadata = ModelCardData(
language="fr",
license="bsd",
library_name="sklearn",
@@ -1299,30 +1388,42 @@ def test_load_model_file_not_found(self, meth):
class TestPlotSection:
def test_format_path_is_str(self):
- section = PlotSection(alt_text="some title", path="path/plot.png")
+ section = PlotSection(
+ title="", content="", alt_text="some title", path="path/plot.png"
+ )
expected = ""
assert section.format() == expected
def test_format_path_is_pathlib(self):
- section = PlotSection(alt_text="some title", path=Path("path") / "plot.png")
+ section = PlotSection(
+ title="", content="", alt_text="some title", path=Path("path") / "plot.png"
+ )
expected = f""
assert section.format() == expected
@pytest.mark.parametrize("meth", [str, repr])
def test_str_and_repr(self, meth):
- section = PlotSection(alt_text="some title", path="path/plot.png")
- expected = "'path/plot.png'"
+ section = PlotSection(
+ title="", content="", alt_text="some title", path="path/plot.png"
+ )
+ expected = "PlotSection(path/plot.png)"
assert meth(section) == expected
def test_str(self):
- section = PlotSection(alt_text="some title", path="path/plot.png")
- expected = "'path/plot.png'"
+ section = PlotSection(
+ title="", content="", alt_text="some title", path="path/plot.png"
+ )
+ expected = "PlotSection(path/plot.png)"
assert str(section) == expected
@pytest.mark.parametrize("folded", [True, False])
def test_folded(self, folded):
section = PlotSection(
- alt_text="some title", path="path/plot.png", folded=folded
+ title="",
+ content="",
+ alt_text="some title",
+ path="path/plot.png",
+ folded=folded,
)
output = section.format()
if folded:
@@ -1330,6 +1431,10 @@ def test_folded(self, folded):
else:
assert "" not in output
+ def test_add_with_description(self):
+ # FIXME
+ pass
+
class TestTableSection:
@pytest.fixture
@@ -1337,7 +1442,7 @@ def table_dict(self):
return {"split": [1, 2, 3], "score": [4, 5, 6]}
def test_table_is_dict(self, table_dict):
- section = TableSection(table=table_dict)
+ section = TableSection(title="", content="", table=table_dict)
expected = """| split | score |
|---------|---------|
| 1 | 4 |
@@ -1348,7 +1453,7 @@ def test_table_is_dict(self, table_dict):
def test_table_is_dataframe(self, table_dict):
pd = pytest.importorskip("pandas")
df = pd.DataFrame(table_dict)
- section = TableSection(table=df)
+ section = TableSection(title="", content="", table=df)
expected = """| split | score |
|---------|---------|
| 1 | 4 |
@@ -1358,16 +1463,16 @@ def test_table_is_dataframe(self, table_dict):
@pytest.mark.parametrize("meth", [str, repr])
def test_str_and_repr_table_is_dict(self, table_dict, meth):
- section = TableSection(table=table_dict)
- expected = "Table(3x2)"
+ section = TableSection(title="", content="", table=table_dict)
+ expected = "TableSection(3x2)"
assert meth(section) == expected
@pytest.mark.parametrize("meth", [str, repr])
def test_str_and_repr_table_is_dataframe(self, table_dict, meth):
pd = pytest.importorskip("pandas")
df = pd.DataFrame(table_dict)
- section = TableSection(table=df)
- expected = "Table(3x2)"
+ section = TableSection(title="", content="", table=df)
+ expected = "TableSection(3x2)"
assert meth(section) == expected
@pytest.mark.parametrize("table", [{}, "pandas"])
@@ -1379,7 +1484,7 @@ def test_raise_error_empty_table(self, table):
msg = "Trying to add table with no columns"
with pytest.raises(ValueError, match=msg):
- TableSection(table=table)
+ TableSection(title="", content="", table=table)
@pytest.mark.parametrize("table", [{"col0": []}, "pandas"])
def test_table_with_no_rows_works(self, table):
@@ -1388,17 +1493,17 @@ def test_table_with_no_rows_works(self, table):
pd = pytest.importorskip("pandas")
table = pd.DataFrame(data=[], columns=["col0"])
- TableSection(table=table).format() # no error raised
+ TableSection(title="", content="", table=table).format() # no error raised
def test_pandas_not_installed(self, table_dict, pandas_not_installed):
# use pandas_not_installed fixture from conftest.py to pretend that
# pandas is not installed
- section = TableSection(table=table_dict)
+ section = TableSection(title="", content="", table=table_dict)
assert section._is_pandas_df is False
@pytest.mark.parametrize("folded", [True, False])
def test_folded(self, table_dict, folded):
- section = TableSection(table=table_dict, folded=folded)
+ section = TableSection(title="", content="", table=table_dict, folded=folded)
output = section.format()
if folded:
assert "" in output
@@ -1429,7 +1534,7 @@ def __repr__(self) -> str:
line breaks
""",
]
- section = TableSection(table=table_dict)
+ section = TableSection(title="", content="", table=table_dict)
expected = """| split | score | with break |
|-|-|-|
| 1 | 4 | obj
with lb |
@@ -1442,6 +1547,19 @@ def __repr__(self) -> str:
result = _strip_multiple_chars(result, "-")
assert result == expected
+ def test_add_table_with_description(self, model_card, table_dict):
+ model_card.add_table(description="My fancy table", **{"The table": table_dict})
+ section = model_card.select("The table")
+ content = section.format()
+ expected = """My fancy table
+
+| split | score |
+|---------|---------|
+| 1 | 4 |
+| 2 | 5 |
+| 3 | 6 |"""
+ assert content == expected
+
class TestCustomTemplate:
@pytest.fixture
@@ -1461,17 +1579,17 @@ def card(self, template):
def test_add_model_plot(self, card):
card.add_model_plot(section="Model/Model plot")
- content = card.select("Model/Model plot").content
+ content = card.select("Model/Model plot").format()
assert "LinearRegression" in content
def test_add_hyperparams(self, card):
card.add_hyperparams(section="Model/Hyperparams")
- content = card.select("Model/Hyperparams").content
+ content = card.select("Model/Hyperparams").format()
assert "fit_intercept" in content
def test_add_metrics(self, card):
card.add_metrics(accuracy=0.1, section="Model/Metrics")
- content = card.select("Model/Metrics").content
+ content = card.select("Model/Metrics").format()
assert "accuracy" in content
assert "0.1" in content