diff --git a/skops/card/_markup.py b/skops/card/_markup.py index 32f94b36..2e42a25c 100644 --- a/skops/card/_markup.py +++ b/skops/card/_markup.py @@ -266,7 +266,7 @@ def _table(self, item) -> str: data_transposed = zip(*body) table = {key: val for key, val in zip(columns, data_transposed)} - res = TableSection(table).format() + res = TableSection(title="", content="", table=table).format() return res def _parse_div(self, item) -> str: diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 273d78c3..0cb642d2 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field from pathlib import Path from reprlib import Repr -from typing import Any, Iterator, Literal, Protocol, Sequence, Union +from typing import Any, Iterator, Literal, Sequence, Union import joblib from huggingface_hub import ModelCardData @@ -58,66 +58,6 @@ def _clean_table(table: str) -> str: return table -@dataclass -class PlotSection: - """Adds a link to a figure to the model card""" - - alt_text: str - path: str | Path - folded: bool = False - - def format(self) -> str: - text = f"![{self.alt_text}]({self.path})" - return wrap_as_details(text, folded=self.folded) - - def __repr__(self) -> str: - return repr(self.path) - - -@dataclass -class TableSection: - """Adds a table to the model card""" - - table: Mapping[str, Sequence[Any]] - folded: bool = False - - def __post_init__(self) -> None: - try: - import pandas as pd - - self._is_pandas_df = isinstance(self.table, pd.DataFrame) - except ImportError: - self._is_pandas_df = False - - if self._is_pandas_df: - ncols = len(self.table.columns) # type: ignore - else: - ncols = len(self.table) - if ncols == 0: - raise ValueError("Trying to add table with no columns") - - def format(self) -> str: - if self._is_pandas_df: - headers = self.table.columns # type: ignore - else: - headers = self.table.keys() - - table = _clean_table( - tabulate(self.table, tablefmt="github", headers=headers, showindex=False) - ) - return wrap_as_details(table, folded=self.folded) - - def __repr__(self) -> str: - if self._is_pandas_df: - nrows, ncols = self.table.shape # type: ignore - else: - # table cannot be empty, so no checks needed here - ncols = len(self.table) - key = next(iter(self.table.keys())) - nrows = len(self.table[key]) - return f"Table({nrows}x{ncols})" - - def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData: """Construct a ``ModelCardData`` object from a ``config.json`` file. @@ -244,7 +184,7 @@ class Section: """Building block of the model card. The model card is represented internally as a dict with keys being strings - and values being Sections. The key is identical to the section title. + and values being ``Section``s. The key is identical to the section title. Additionally, the section may hold content in the form of strings (can be an empty string) or a ``Formattable``, which is simply an object with a @@ -261,7 +201,7 @@ class Section: """ title: str - content: Formattable | str + content: str subsections: dict[str, Section] = field(default_factory=dict) visible: bool = True @@ -279,13 +219,14 @@ def select(self, key: str) -> Section: ------- section : Section A dataclass containing all information relevant to the selected - section. Those are the title, the content, and subsections (in a - dict). + section. Those are the title, the content, subsections (in a dict), + and additional fields that depend on the type of section. Raises ------ KeyError If the given section name was not found, a ``KeyError`` is raised. + """ section_names = split_subsection_names(key) # check that no section name is empty @@ -293,15 +234,97 @@ def select(self, key: str) -> Section: msg = f"Section name cannot be empty but got '{key}'" raise KeyError(msg) - section = self + section: Section = self for section_name in section_names: section = section.subsections[section_name] return section + def format(self) -> str: + return self.content + + def __repr__(self) -> str: + """Generates the ``repr`` of this section. + + ``repr`` determines how the content of this section is shown in the + Card's repr. + """ + return self.content + + +@dataclass +class PlotSection(Section): + """Adds a link to a figure to the model card""" + + path: str | Path = "" + alt_text: str = "" + folded: bool = False + + def __post_init__(self) -> None: + if not self.path: + raise TypeError(f"{self.__class__.__name__} requires a path") -class Formattable(Protocol): def format(self) -> str: - ... # pragma: no cover + # if no alt text provided, fall back to figure path + alt_text = self.alt_text or self.path + text = f"![{alt_text}]({self.path})" + val = wrap_as_details(text, folded=self.folded) + if self.content: + val = f"{self.content}\n\n{val}" + return val + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.path})" + + +@dataclass +class TableSection(Section): + """Adds a table to the model card""" + + table: Mapping[str, Sequence[Any]] = field(default_factory=dict) + folded: bool = False + + def __post_init__(self) -> None: + self._check_table() + + def _check_table(self) -> None: + try: + import pandas as pd + + self._is_pandas_df = isinstance(self.table, pd.DataFrame) + except ImportError: + self._is_pandas_df = False + + if self._is_pandas_df: + ncols = len(self.table.columns) # type: ignore + else: + ncols = len(self.table) + if ncols == 0: + raise ValueError("Trying to add table with no columns") + + def format(self) -> str: + if self._is_pandas_df: + headers = self.table.columns # type: ignore + else: + headers = self.table.keys() + + table = _clean_table( + tabulate(self.table, tablefmt="github", headers=headers, showindex=False) + ) + val = wrap_as_details(table, folded=self.folded) + + if self.content: + val = f"{self.content}\n\n{val}" + return val + + def __repr__(self) -> str: + if self._is_pandas_df: + nrows, ncols = self.table.shape # type: ignore + else: + # table cannot be empty, so no checks needed here + ncols = len(self.table) + key = next(iter(self.table.keys())) + nrows = len(self.table[key]) + return f"{self.__class__.__name__}({nrows}x{ncols})" def _load_model(model: Any, trusted=False) -> Any: @@ -443,10 +466,10 @@ class Card: model=LogisticRegression(random_state=0, solver='liblinear'), metadata.license=mit, Model description=This is the best model, - Model description/Training Procedure/... | | warm_start | False | , + Model description/Training Procedure/Hyperparameters=TableSection(15x2), Model description/Training Procedure/..., - Model description/Evaluation Results=...ccuracy | 0.96 | | f1 score | 0.96 |, - Model description/Confusion Matrix=...confusion_matrix.png'), + Model description/Evaluation Results=TableSection(2x2), + Model description/Confusion Matrix=Pl...confusion_matrix.png), Model description/Model name=This model is called Bob, A new section=Please rate my model, ) @@ -513,7 +536,7 @@ def get_model(self) -> Any: # model has changed, but at the moment we have no way of knowing that return model - def add(self, **kwargs: str | Formattable) -> Self: + def add(self, **kwargs: str) -> Self: """Add new section(s) to the model card. Add one or multiple sections to the model card. The section names are @@ -685,7 +708,7 @@ def delete(self, key: str | Sequence[str]) -> None: parent_section = self._select(subsection_names, create=False) del parent_section[leaf_node_name] - def _add_single(self, key: str, val: Formattable | str) -> Section: + def _add_single(self, key: str, val: str | Section) -> Section: """Add a single section. If the (sub)section does not exist, it is created. Otherwise, the @@ -696,8 +719,10 @@ def _add_single(self, key: str, val: Formattable | str) -> Section: key: str The name of the (sub)section. - val: str or Formattable - The value to assign to the (sub)section. + val: str or Section + The value to assign to the (sub)section. If this is already a + section, leave it as it is. If it's a string, create a + :class:`skops.card._model_card.Section`. Returns ------- @@ -708,13 +733,27 @@ def _add_single(self, key: str, val: Formattable | str) -> Section: *subsection_names, leaf_node_name = split_subsection_names(key) section = self._select(subsection_names) - if leaf_node_name in section: - # entry exists, only overwrite content - section[leaf_node_name].content = val + if isinstance(val, str): + # val is a str, create a Section + new_section = Section(title=leaf_node_name, content=val) else: - # entry does not exist, create a new one - section[leaf_node_name] = Section(title=leaf_node_name, content=val) + # val is already a section and can be used as is + new_section = val + if leaf_node_name in section: + # entry exists, preserve its subsections + old_section = section[leaf_node_name] + if new_section.subsections and ( + new_section.subsections != old_section.subsections + ): + msg = ( + f"Trying to override section '{leaf_node_name}' but found " + "conflicting subsections." + ) + raise ValueError(msg) + new_section.subsections = old_section.subsections + + section[leaf_node_name] = new_section return section[leaf_node_name] def add_model_plot( @@ -764,12 +803,14 @@ def add_model_plot( if self.template == Templates.skops.value: description = "The model plot is below." - self._add_model_plot(self.get_model(), section=section, description=description) + self._add_model_plot( + self.get_model(), section_name=section, description=description + ) return self def _add_model_plot( - self, model: Any, section: str, description: str | None + self, model: Any, section_name: str, description: str | None ) -> None: """Add model plot section @@ -787,7 +828,10 @@ def _add_model_plot( else: content = model_plot_div - self._add_single(section, content) + description = description or "" + title = split_subsection_names(section_name)[-1] + section = Section(title=title, content=content) + self._add_single(section_name, section) def add_hyperparams( self, section: str | None = None, description: str | None = None @@ -826,42 +870,27 @@ def add_hyperparams( description = "The model is trained with below hyperparameters." self._add_hyperparams( - self.get_model(), section=section, description=description + self.get_model(), section_name=section, description=description ) return self def _add_hyperparams( - self, model: Any, section: str, description: str | None + self, model: Any, section_name: str, description: str | None ) -> None: """Add hyperparameter section. The model should be a loaded sklearn model, not a path. """ - hyperparameter_dict = model.get_params(deep=True) - table = _clean_table( - tabulate( - list(hyperparameter_dict.items()), - headers=["Hyperparameter", "Value"], - tablefmt="github", - ) - ) - table_folded = textwrap.dedent( - """ -
- Click to expand - - {} + params = model.get_params(deep=True) + table = {"Hyperparameter": list(params.keys()), "Value": list(params.values())} -
""" - ).format(table) - - if description: - content = f"{description}\n{table_folded}" - else: - content = table_folded - - self._add_single(section, content) + description = description or "" + title = split_subsection_names(section_name)[-1] + section = TableSection( + title=title, content=description, table=table, folded=True + ) + self._add_single(section_name, section) def add_get_started_code( self, @@ -947,7 +976,7 @@ def add_get_started_code( def _add_get_started_code( self, - section: str, + section_name: str, file_name: str, model_format: Literal["pickle", "skops"], description: str | None, @@ -965,9 +994,18 @@ def _add_get_started_code( else: content = code - self._add_single(section, content) + title = split_subsection_names(section_name)[-1] + section = Section(title=title, content=content) + self._add_single(section_name, section) - def add_plot(self, *, folded=False, **kwargs: str) -> Self: + def add_plot( + self, + *, + description: str | None = None, + alt_text: str | None = None, + folded=False, + **kwargs: str, + ) -> Self: """Add plots to the model card. The plot should be saved on the file system and the path passed as @@ -975,6 +1013,20 @@ def add_plot(self, *, folded=False, **kwargs: str) -> Self: Parameters ---------- + description: str or None (default=None) + If a string is passed as description, it is shown before the figure. + If multiple figures are added with one call, they all get the same + description. To add multiple figures with different descriptions, + call this method multiple times. + + alt_text: : str or None (default=None) + If a string is passed as ``alt_text``, it is used as the alternative + text for the figure (i.e. what is shown if the figure cannot be + rendered). If this argument is ``None``, the alt_text will just be + the same as the section title. If multiple figures are added with + one call, they all get the same alt text. To add multiple figures + with different alt texts, call this method multiple times. + folded: bool (default=False) If set to ``True``, the plot will be enclosed in a ``details`` tag. That means the content is folded by default and users have to click @@ -994,14 +1046,26 @@ def add_plot(self, *, folded=False, **kwargs: str) -> Self: Card object. """ + description = description or "" for section_name, plot_path in kwargs.items(): - plot_name = split_subsection_names(section_name)[-1] - section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded) + title = split_subsection_names(section_name)[-1] + alt_text = alt_text or title + section = PlotSection( + title=title, + content=description, + alt_text=alt_text, + path=plot_path, + folded=folded, + ) self._add_single(section_name, section) return self def add_table( - self, *, folded: bool = False, **kwargs: dict["str", list[Any]] + self, + *, + description: str | None = None, + folded: bool = False, + **kwargs: dict["str", list[Any]], ) -> Self: """Add a table to the model card. @@ -1027,6 +1091,12 @@ def add_table( Parameters ---------- + description: str or None (default=None) + If a string is passed as description, it is shown before the table. + If multiple tables are added with one call, they all get the same + description. To add multiple tables with different descriptions, + call this method multiple times. + folded: bool (default=False) If set to ``True``, the table will be enclosed in a ``details`` tag. That means the content is folded by default and users have to click @@ -1047,8 +1117,11 @@ def add_table( Card object. """ + description = description or "" for key, val in kwargs.items(): - section = TableSection(table=val, folded=folded) + section = TableSection( + title=key, content=description, table=val, folded=folded + ) self._add_single(key, section) return self @@ -1098,7 +1171,7 @@ def add_metrics( "the evaluation results." ) self._metrics.update(kwargs) - self._add_metrics(section, self._metrics, description=description) + self._add_metrics(section, description=description, metrics=self._metrics) return self def add_permutation_importances( @@ -1108,6 +1181,7 @@ def add_permutation_importances( plot_file: str = "permutation_importances.png", plot_name: str = "Permutation Importances", overwrite: bool = False, + description: str | None = None, ) -> Self: """Plots permutation importance and saves it to model card. @@ -1129,6 +1203,9 @@ def add_permutation_importances( Whether to overwrite the permutation importance plot file, if a plot by that name already exists. + description : str | None (default=None) + An optional description to be added before the plot. + Returns ------- self : object @@ -1151,31 +1228,31 @@ def add_permutation_importances( ax.set_title(plot_name) ax.set_xlabel("Decrease in Score") plt.savefig(plot_file) - self.add_plot(**{plot_name: plot_file}) + self.add_plot(description=description, **{plot_name: plot_file}) return self def _add_metrics( self, - section: str, - metrics: dict[str, str | float | int], + section_name: str, description: str | None, + metrics: dict[str, str | float | int], ) -> None: """Add metrics to the Evaluation Results section.""" if self._metrics: - data_transposed = zip(*self._metrics.items()) # make column oriented - inp = {key: val for key, val in zip(["Metric", "Value"], data_transposed)} - table = TableSection(inp).format() + # transpose from row oriented to column oriented + data_transposed = zip(*self._metrics.items()) + table = { + key: list(val) for key, val in zip(["Metric", "Value"], data_transposed) + } else: # create empty table - table = TableSection({"Metric": [], "Value": []}).format() + table = {"Metric": [], "Value": []} - if description: - content = f"{description}\n\n{table}" - else: - content = table - - self._add_single(section, content) + description = description or "" + title = split_subsection_names(section_name)[-1] + section = TableSection(title=title, content=description, table=table) + self._add_single(section_name, section) def _generate_metadata(self, metadata: ModelCardData) -> Iterator[str]: """Yield metadata in yaml format""" @@ -1192,24 +1269,21 @@ def _generate_content( content. """ - for val in data.values(): - if not val.visible: + for section in data.values(): + if not section.visible: continue - title = f"{depth * '#'} {val.title}" + title = f"{depth * '#'} {section.title}" yield title - if isinstance(val.content, str): - yield val.content - else: # is a Formattable - yield val.content.format() + yield section.format() - if val.subsections: - yield from self._generate_content(val.subsections, depth=depth + 1) + if section.subsections: + yield from self._generate_content(section.subsections, depth=depth + 1) def _iterate_content( self, data: dict[str, Section], parent_section: str = "" - ) -> Iterator[tuple[str, Formattable | str]]: + ) -> Iterator[tuple[str, Section]]: """Yield tuples of title and (non-formatted) content.""" for val in data.values(): if parent_section: @@ -1217,7 +1291,7 @@ def _iterate_content( else: title = val.title - yield title, val.content + yield title, val if val.subsections: yield from self._iterate_content(val.subsections, parent_section=title) @@ -1252,15 +1326,14 @@ def __repr__(self) -> str: # repr for contents content_reprs = [] - for title, content in self._iterate_content(self._data): + for title, section in self._iterate_content(self._data): + content = section.format() if not content: continue - if isinstance(content, str) and content.rstrip("`").rstrip().endswith( - CONTENT_PLACEHOLDER - ): + if content.rstrip("`").rstrip().endswith(CONTENT_PLACEHOLDER): # if content is just some default text, no need to show it continue - content_reprs.append(self._format_repr(f"{title}={content},")) + content_reprs.append(self._format_repr(f"{title}={section},")) content_repr = "\n".join(content_reprs) # combine all parts diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 9ad94277..4b74d724 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -8,7 +8,7 @@ import numpy as np import pytest import sklearn -from huggingface_hub import CardData, metadata_load +from huggingface_hub import ModelCardData, metadata_load from sklearn.datasets import load_iris from sklearn.inspection import permutation_importance from sklearn.linear_model import LinearRegression, LogisticRegression @@ -20,6 +20,7 @@ from skops.card._model_card import ( SKOPS_TEMPLATE, PlotSection, + Section, TableSection, _load_model, ) @@ -263,7 +264,7 @@ def expected(self): def test_default(self, model_card, expected): result = model_card.select( "Model description/Training Procedure/Hyperparameters" - ).content + ).format() # remove multiple whitespaces and dashes, as they're not important and may # differ depending on OS @@ -273,7 +274,7 @@ def test_default(self, model_card, expected): def test_other_section(self, model_card, expected): model_card.add_hyperparams(section="Other section") - result = model_card.select("Other section").content + result = model_card.select("Other section").format() # remove multiple whitespaces and dashes, as they're not important and may # differ depending on OS @@ -285,7 +286,7 @@ def test_other_description(self, model_card, expected): model_card.add_hyperparams(description="Awesome hyperparams") result = model_card.select( "Model description/Training Procedure/Hyperparameters" - ).content + ).format() assert result.startswith("Awesome hyperparams") @pytest.mark.parametrize("template", CUSTOM_TEMPLATES) @@ -305,9 +306,9 @@ def test_add_twice(self, model_card): # of sense text1 = model_card.select( "Model description/Training Procedure/Hyperparameters" - ).content + ).format() model_card.add_hyperparams(section="Other section") - text2 = model_card.select("Other section").content + text2 = model_card.select("Other section").format() assert text1 == text2 @@ -321,7 +322,7 @@ def get_params(self, deep=False): model_card = Card(EstimatorWithLbInParams()) section_name = "Model description/Training Procedure/Hyperparameters" - text_hyperparams = model_card.select(section_name).content + text_hyperparams = model_card.select(section_name).format() # remove multiple whitespaces, as they're not important text_cleaned = _strip_multiple_chars(text_hyperparams, " ") @@ -333,13 +334,13 @@ class TestAddMetrics: def test_default(self, model_card): # by default, don't add a table, as there are no metrics - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() expected = "[More Information Needed]" assert result == expected def test_empty_metrics_table(self, model_card): model_card.add_metrics() - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() expected = ( "You can find the details about evaluation process and the evaluation " "results.\n\n" @@ -354,7 +355,7 @@ def test_multiple_metrics(self, model_card): f1=0.1, # float awesomeness=123, # int ) - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() expected = ( "You can find the details about evaluation process and the evaluation " "results.\n\n" @@ -368,7 +369,7 @@ def test_multiple_metrics(self, model_card): def test_other_section(self, model_card): model_card.add_metrics(accuracy=0.9, section="Other section") - result = model_card.select("Other section").content + result = model_card.select("Other section").format() expected = ( "You can find the details about evaluation process and the evaluation " "results.\n\n" @@ -380,7 +381,7 @@ def test_other_section(self, model_card): def test_other_description(self, model_card): model_card.add_metrics(accuracy=0.9, description="Awesome metrics") - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() assert result.startswith("Awesome metrics") @pytest.mark.parametrize("template", CUSTOM_TEMPLATES) @@ -398,100 +399,123 @@ def test_add_twice(self, model_card): # it's possible to add the section twice, even if it doesn't make a lot # of sense model_card.add_metrics(accuracy=0.9) - text1 = model_card.select("Model description/Evaluation Results").content + text1 = model_card.select("Model description/Evaluation Results").format() model_card.add_metrics(section="Other section") - text2 = model_card.select("Other section").content + text2 = model_card.select("Other section").format() assert text1 == text2 -def test_permutation_importances( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) +class TestAddPermutationImportance: + @pytest.fixture + def importances(self, iris_estimator, iris_data): + X, y = iris_data + result = permutation_importance( + iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 + ) + return result - model_card.add_permutation_importances( - result, - X.columns, - Path(destination_path) / "importance.png", - "Permutation Importance", - ) - temp_path = Path(destination_path) / "importance.png" - assert f"![Permutation Importance]({temp_path}" in model_card.render() + def test_permutation_importances( + self, iris_data, importances, model_card, destination_path + ): + X, _ = iris_data + model_card.add_permutation_importances( + importances, + columns=X.columns, + plot_file=Path(destination_path) / "importance.png", + plot_name="Permutation Importance", + ) + temp_path = Path(destination_path) / "importance.png" + section = model_card.select("Permutation Importance") + expected = f"![Permutation Importance]({temp_path})" + assert section.format() == expected + def test_multiple_permutation_importances( + self, iris_data, iris_estimator, importances, model_card, destination_path + ): + X, y = iris_data + model_card.add_permutation_importances( + importances, X.columns, plot_file=Path(destination_path) / "importance.png" + ) -def test_multiple_permutation_importances( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) - model_card.add_permutation_importances( - result, X.columns, plot_file=Path(destination_path) / "importance.png" - ) - f1 = make_scorer(f1_score, average="micro") - result = permutation_importance( - iris_estimator, X, y, scoring=f1, n_repeats=10, random_state=42, n_jobs=2 - ) - model_card.add_permutation_importances( - result, - X.columns, - plot_file=Path(destination_path) / "f1_importance.png", - plot_name="Permutation Importance on f1", - ) - # check for default one - temp_path = Path(destination_path) / "importance.png" - assert f"![Permutation Importances]({temp_path}" in model_card.render() - # check for F1 - temp_path_f1 = Path(destination_path) / "f1_importance.png" - assert f"![Permutation Importance on f1]({temp_path_f1}" in model_card.render() + f1 = make_scorer(f1_score, average="micro") + importances_f1 = permutation_importance( + iris_estimator, X, y, scoring=f1, n_repeats=10, random_state=42, n_jobs=2 + ) + model_card.add_permutation_importances( + importances_f1, + columns=X.columns, + plot_file=Path(destination_path) / "f1_importance.png", + plot_name="Permutation Importance on f1", + ) + # check for default one + temp_path = Path(destination_path) / "importance.png" + section = model_card.select("Permutation Importances") + expected = f"![Permutation Importances]({temp_path})" + assert section.format() == expected -def test_duplicate_permutation_importances( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) - plot_path = os.path.join(destination_path, "importance.png") - model_card.add_permutation_importances(result, X.columns, plot_file=plot_path) - with pytest.raises( - ValueError, - match=( - "already exists. Set `overwrite` to `True` or pass a" - " different filename for the plot." - ), + # check for F1 + temp_path_f1 = Path(destination_path) / "f1_importance.png" + section = model_card.select("Permutation Importance on f1") + expected = f"![Permutation Importance on f1]({temp_path_f1})" + assert section.format() == expected + + def test_duplicate_permutation_importances( + self, iris_data, importances, model_card, destination_path ): + X, _ = iris_data + plot_path = os.path.join(destination_path, "importance.png") + model_card.add_permutation_importances( + importances, X.columns, plot_file=plot_path + ) + with pytest.raises( + ValueError, + match=( + "already exists. Set `overwrite` to `True` or pass a" + " different filename for the plot." + ), + ): + model_card.add_permutation_importances( + importances, + columns=X.columns, + plot_file=plot_path, + plot_name="Permutation Importance on f1", + ) + + def test_duplicate_permutation_importances_overwrite( + self, iris_data, importances, model_card, destination_path + ): + X, _ = iris_data + plot_path = os.path.join(destination_path, "importance.png") + model_card.add_permutation_importances( + importances, X.columns, plot_file=plot_path + ) + model_card.add_permutation_importances( - result, - X.columns, + importances, + columns=X.columns, plot_file=plot_path, plot_name="Permutation Importance on f1", + overwrite=True, ) + section = model_card.select("Permutation Importance on f1") + expected = f"![Permutation Importance on f1]({plot_path})" + assert section.format() == expected - -def test_duplicate_permutation_importances_overwrite( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) - plot_path = os.path.join(destination_path, "importance.png") - model_card.add_permutation_importances(result, X.columns, plot_file=plot_path) - - model_card.add_permutation_importances( - result, - X.columns, - plot_file=plot_path, - plot_name="Permutation Importance on f1", - overwrite=True, - ) - assert f"![Permutation Importance on f1]({plot_path}" in model_card.render() + def test_permutation_importances_with_description( + self, iris_data, importances, model_card, destination_path + ): + X, _ = iris_data + model_card.add_permutation_importances( + importances, + columns=X.columns, + plot_file=Path(destination_path) / "importance.png", + description="Very important", + ) + temp_path = Path(destination_path) / "importance.png" + section = model_card.select("Permutation Importances") + expected = f"Very important\n\n![Permutation Importances]({temp_path})" + assert section.format() == expected class TestAddGetStartedCode: @@ -539,7 +563,7 @@ def model_card_skops(self, metadata_skops): def test_default_pickle(self, model_card): # by default, don't add a table, as there are no metrics - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -556,7 +580,7 @@ def test_default_pickle(self, model_card): def test_default_skops(self, model_card_skops): # by default, don't add a table, as there are no metrics - result = model_card_skops.select("How to Get Started with the Model").content + result = model_card_skops.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -590,18 +614,18 @@ def to_dict(self): def test_other_section(self, model_card): model_card.add_get_started_code(section="Other section") - result = model_card.select("Other section").content + result = model_card.select("Other section").format() expected = "Use the code below to get started with the model." assert result.startswith(expected) def test_other_description(self, model_card): model_card.add_get_started_code(description="Awesome code") - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() assert result.startswith("Awesome code") def test_other_filename(self, model_card): model_card.add_get_started_code(file_name="foobar.pkl") - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -618,7 +642,7 @@ def test_other_filename(self, model_card): def test_other_model_format(self, model_card): model_card.add_get_started_code(model_format="skops") - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -670,9 +694,9 @@ def test_custom_template_no_section_raises(self, template): def test_add_twice(self, model_card): # it's possible to add the section twice, even if it doesn't make a lot # of sense - text1 = model_card.select("How to Get Started with the Model").content + text1 = model_card.select("How to Get Started with the Model").format() model_card.add_get_started_code(section="Other section") - text2 = model_card.select("Other section").content + text2 = model_card.select("Other section").format() assert text1 == text2 @@ -863,6 +887,70 @@ def test_add_content_to_existing_section(self, model_card): assert num_subsection_before == num_subsection_after assert section.content == "sklearn FTW" + def test_add_plain_section_works(self, model_card): + # It is allowed to add a *Section object, but it's not documented and + # users should normally not use that feature + section = Section("title may differ from section name", "some content") + model_card.add( + a_string="normal string", + a_section=section, + ) + assert model_card.select("a_section") == section + + def test_add_section_preserves_subsections(self, model_card): + # As explained in the previous test, users can theoretically add section + # instances. If they override an existing section with a new section, + # the subsections of the existing section should be preserved. + + # first let's add a section and a subsection + model_card.add(**{"new section": "hello", "new section/subsection": "world"}) + assert model_card.select("new section").format() == "hello" + assert model_card.select("new section/subsection").format() == "world" + + # now let's override the section, the subsection should be preserved + new_section = Section("new section", "bonjour") + model_card.add(**{"new section": new_section}) + assert model_card.select("new section").format() == "bonjour" + assert model_card.select("new section/subsection").format() == "world" + + def test_add_section_with_identical_subsection_preserves_subsections( + self, model_card + ): + # As explained in the previous tests, users can theoretically add + # section instances. If they override an existing section with a new + # section, the subsections of the existing section should be preserved. + # If the new section they add has its own subsections, and these + # subsections are identical to the old subsections, that should be fine. + + # first let's add a section and a subsection + model_card.add(**{"new section": "hello", "new section/subsection": "world"}) + + # now let's override the section using the same subsections + old_subsection = model_card.select("new section").subsections + new_section = Section("new section", "bonjour", subsections=old_subsection) + model_card.add(**{"new section": new_section}) + assert model_card.select("new section").format() == "bonjour" + assert model_card.select("new section/subsection").format() == "world" + + def test_add_section_with_different_subsection_raises(self, model_card): + # This is the same as the previous test, but now the section used to + # override the previous section has different subsections. Now we don't + # know what to do and should raise. This is okay because normally, a + # user shouldn't add section instances anyway. + + # first let's add a section and a subsection + model_card.add(**{"new section": "hello", "new section/subsection": "world"}) + + # now let's override the section using different subsections + new_subsection = {"new subsection": Section("subsection", "mars")} + new_section = Section("new section", "bonjour", subsections=new_subsection) + + match = ( + "Trying to override section 'new section' but found conflicting subsections" + ) + with pytest.raises(ValueError, match=match): + model_card.add(**{"new section": new_section}) + class TestDelete: """Deleting sections and subsections""" @@ -952,7 +1040,7 @@ def test_add_plot(self, destination_path, model_card): plt.plot([4, 5, 6, 7]) plt.savefig(Path(destination_path) / "fig1.png") model_card = model_card.add_plot(fig1="fig1.png") - plot_content = model_card.select("fig1").content.format() + plot_content = model_card.select("fig1").format() assert plot_content == "![fig1](fig1.png)" def test_add_plot_to_existing_section(self, destination_path, model_card): @@ -961,9 +1049,27 @@ def test_add_plot_to_existing_section(self, destination_path, model_card): plt.plot([4, 5, 6, 7]) plt.savefig(Path(destination_path) / "fig1.png") model_card = model_card.add_plot(**{"Model description/Figure 1": "fig1.png"}) - plot_content = model_card.select("Model description/Figure 1").content.format() + plot_content = model_card.select("Model description/Figure 1").format() assert plot_content == "![Figure 1](fig1.png)" + def test_add_plot_with_description(self, destination_path, model_card): + import matplotlib.pyplot as plt + + plt.plot([4, 5, 6, 7]) + plt.savefig(Path(destination_path) / "fig1.png") + model_card = model_card.add_plot(description="My fancy plot", fig1="fig1.png") + plot_content = model_card.select("fig1").format() + assert plot_content == "My fancy plot\n\n![fig1](fig1.png)" + + def test_add_plot_with_alt_text(self, destination_path, model_card): + import matplotlib.pyplot as plt + + plt.plot([4, 5, 6, 7]) + plt.savefig(Path(destination_path) / "fig1.png") + model_card = model_card.add_plot(alt_text="the figure", fig1="fig1.png") + plot_content = model_card.select("fig1").format() + assert plot_content == "![the figure](fig1.png)" + class TestMetadata: def test_adding_metadata(self, model_card): @@ -1129,34 +1235,17 @@ def expected_lines(self): card_repr = """ Card( model=LinearRegression(fit_intercept=False), - Model description/Training Procedure/...ed | | positive | False | , + Model description/Training Procedure/Hyperparameters=TableSection(4x2), Model description/Training Procedure/..., Model Card Authors=Jane Doe, - Figures/ROC='ROC.png', - Figures/Confusion matrix='confusion_matrix.jpg', + Figures/ROC=PlotSection(ROC.png), + Figures/Confusion matrix=PlotSection(confusion_matrix.jpg), Model Description=A description, - Search Results=Table(3x2), + Search Results=TableSection(3x2), ) """ expected = textwrap.dedent(card_repr).strip() lines = expected.split("\n") - - # TODO: remove when dropping sklearn v0.24 and when dropping v1.1 and - # below. This is because the "normalize" parameter was changed after - # v0.24 will be removed completely in sklearn v1.2. - major, minor, *_ = sklearn.__version__.split(".") - if int(major) < 1: - # v0.24: "deprecated" -> "False" - lines[2] = ( - " Model description/Training Procedure/...se | | positive | False | " - "," - ) - elif int(minor) >= 2: - # >= v1.2: remove argument completely - lines[2] = ( - " Model description/Training Procedure/... | | | positive | False | " - "," - ) return lines @pytest.mark.parametrize("meth", [repr, str]) @@ -1208,7 +1297,7 @@ def test_without_model_attribute(self, card: Card, meth, expected_lines): @pytest.mark.parametrize("meth", [repr, str]) def test_with_metadata(self, card: Card, meth, expected_lines): - metadata = CardData( + metadata = ModelCardData( language="fr", license="bsd", library_name="sklearn", @@ -1299,30 +1388,42 @@ def test_load_model_file_not_found(self, meth): class TestPlotSection: def test_format_path_is_str(self): - section = PlotSection(alt_text="some title", path="path/plot.png") + section = PlotSection( + title="", content="", alt_text="some title", path="path/plot.png" + ) expected = "![some title](path/plot.png)" assert section.format() == expected def test_format_path_is_pathlib(self): - section = PlotSection(alt_text="some title", path=Path("path") / "plot.png") + section = PlotSection( + title="", content="", alt_text="some title", path=Path("path") / "plot.png" + ) expected = f"![some title](path{os.path.sep}plot.png)" assert section.format() == expected @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr(self, meth): - section = PlotSection(alt_text="some title", path="path/plot.png") - expected = "'path/plot.png'" + section = PlotSection( + title="", content="", alt_text="some title", path="path/plot.png" + ) + expected = "PlotSection(path/plot.png)" assert meth(section) == expected def test_str(self): - section = PlotSection(alt_text="some title", path="path/plot.png") - expected = "'path/plot.png'" + section = PlotSection( + title="", content="", alt_text="some title", path="path/plot.png" + ) + expected = "PlotSection(path/plot.png)" assert str(section) == expected @pytest.mark.parametrize("folded", [True, False]) def test_folded(self, folded): section = PlotSection( - alt_text="some title", path="path/plot.png", folded=folded + title="", + content="", + alt_text="some title", + path="path/plot.png", + folded=folded, ) output = section.format() if folded: @@ -1330,6 +1431,10 @@ def test_folded(self, folded): else: assert "
" not in output + def test_add_with_description(self): + # FIXME + pass + class TestTableSection: @pytest.fixture @@ -1337,7 +1442,7 @@ def table_dict(self): return {"split": [1, 2, 3], "score": [4, 5, 6]} def test_table_is_dict(self, table_dict): - section = TableSection(table=table_dict) + section = TableSection(title="", content="", table=table_dict) expected = """| split | score | |---------|---------| | 1 | 4 | @@ -1348,7 +1453,7 @@ def test_table_is_dict(self, table_dict): def test_table_is_dataframe(self, table_dict): pd = pytest.importorskip("pandas") df = pd.DataFrame(table_dict) - section = TableSection(table=df) + section = TableSection(title="", content="", table=df) expected = """| split | score | |---------|---------| | 1 | 4 | @@ -1358,16 +1463,16 @@ def test_table_is_dataframe(self, table_dict): @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr_table_is_dict(self, table_dict, meth): - section = TableSection(table=table_dict) - expected = "Table(3x2)" + section = TableSection(title="", content="", table=table_dict) + expected = "TableSection(3x2)" assert meth(section) == expected @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr_table_is_dataframe(self, table_dict, meth): pd = pytest.importorskip("pandas") df = pd.DataFrame(table_dict) - section = TableSection(table=df) - expected = "Table(3x2)" + section = TableSection(title="", content="", table=df) + expected = "TableSection(3x2)" assert meth(section) == expected @pytest.mark.parametrize("table", [{}, "pandas"]) @@ -1379,7 +1484,7 @@ def test_raise_error_empty_table(self, table): msg = "Trying to add table with no columns" with pytest.raises(ValueError, match=msg): - TableSection(table=table) + TableSection(title="", content="", table=table) @pytest.mark.parametrize("table", [{"col0": []}, "pandas"]) def test_table_with_no_rows_works(self, table): @@ -1388,17 +1493,17 @@ def test_table_with_no_rows_works(self, table): pd = pytest.importorskip("pandas") table = pd.DataFrame(data=[], columns=["col0"]) - TableSection(table=table).format() # no error raised + TableSection(title="", content="", table=table).format() # no error raised def test_pandas_not_installed(self, table_dict, pandas_not_installed): # use pandas_not_installed fixture from conftest.py to pretend that # pandas is not installed - section = TableSection(table=table_dict) + section = TableSection(title="", content="", table=table_dict) assert section._is_pandas_df is False @pytest.mark.parametrize("folded", [True, False]) def test_folded(self, table_dict, folded): - section = TableSection(table=table_dict, folded=folded) + section = TableSection(title="", content="", table=table_dict, folded=folded) output = section.format() if folded: assert "
" in output @@ -1429,7 +1534,7 @@ def __repr__(self) -> str: line breaks """, ] - section = TableSection(table=table_dict) + section = TableSection(title="", content="", table=table_dict) expected = """| split | score | with break | |-|-|-| | 1 | 4 | obj
with lb | @@ -1442,6 +1547,19 @@ def __repr__(self) -> str: result = _strip_multiple_chars(result, "-") assert result == expected + def test_add_table_with_description(self, model_card, table_dict): + model_card.add_table(description="My fancy table", **{"The table": table_dict}) + section = model_card.select("The table") + content = section.format() + expected = """My fancy table + +| split | score | +|---------|---------| +| 1 | 4 | +| 2 | 5 | +| 3 | 6 |""" + assert content == expected + class TestCustomTemplate: @pytest.fixture @@ -1461,17 +1579,17 @@ def card(self, template): def test_add_model_plot(self, card): card.add_model_plot(section="Model/Model plot") - content = card.select("Model/Model plot").content + content = card.select("Model/Model plot").format() assert "LinearRegression" in content def test_add_hyperparams(self, card): card.add_hyperparams(section="Model/Hyperparams") - content = card.select("Model/Hyperparams").content + content = card.select("Model/Hyperparams").format() assert "fit_intercept" in content def test_add_metrics(self, card): card.add_metrics(accuracy=0.1, section="Model/Metrics") - content = card.select("Model/Metrics").content + content = card.select("Model/Metrics").format() assert "accuracy" in content assert "0.1" in content