From 85503edd1d99252b7ea4397bcca3f37d18f1ce52 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Thu, 2 Mar 2023 16:24:09 +0100 Subject: [PATCH 1/7] [WIP] Refactor Card to lazily format sections Currently, the way model cards work is that if a user adds new content, a Section object is created and added as a (sub)section. That object contains the *formatted* content of that section as str, which is then used when rendering. So if the user adds, e.g., a table, the table is eagerly formatted to a str and during rendering, the str is simply returned. With this refactor, when a user adds new content, a specific type of section is added instead, which does not eagerly format the string. For simple text content, this is just a PlainSection which holds the content as a str, so no big change. However, for more complex content like tables or figures, a special section type is added that does not contain the final content as rendered. When the model card is rendered, the section.format() method is called to render the actual content, i.e. the formatting is now called lazily. The main benefit of this refactor is that the sections in a model card are now "aware" of what type of section they are. This allows us to more easily modify them later. E.g. let's say we want to modify the path to a figure or a row in a table: Right now, this would require some quite complex regex to modify the already formatted str. With this refactor, we can just access the attributes of the section, e.g. section.path or section.table, and modify them. A minor benefit could also be performance, because we don't render until it's strictly needed, but that's probably very minor. From the user perspective, this refactor shouldn't change anything as long as they were sticking with the official API. When calling render() or save(), the output is the same as before. Only if the users were directly interacting with sections might there be a breakage. One minor visible change this PR brings as well is that the repr of the card changes slightly. Now it indicates what type of section we're dealing with, e.g. TableSection(...), except if it's a PlainSection, which, same as previously, just returns the repr of its content. This commit is WIP because I also added new "description" parameters to add_* methods that were missing it so far. Now, if a user adds, say, a plot, they can optionally add a description for the plot too. This functionality lacks unit tests as of now. --- skops/card/_markup.py | 3 +- skops/card/_model_card.py | 376 ++++++++++++++++++++-------------- skops/card/tests/test_card.py | 151 ++++++++------ 3 files changed, 304 insertions(+), 226 deletions(-) diff --git a/skops/card/_markup.py b/skops/card/_markup.py index 32f94b36..59a2377d 100644 --- a/skops/card/_markup.py +++ b/skops/card/_markup.py @@ -266,7 +266,8 @@ def _table(self, item) -> str: data_transposed = zip(*body) table = {key: val for key, val in zip(columns, data_transposed)} - res = TableSection(table).format() + title, description = "", "" + res = TableSection(title, description, table=table).format() return res def _parse_div(self, item) -> str: diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 273d78c3..e996adad 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field from pathlib import Path from reprlib import Repr -from typing import Any, Iterator, Literal, Protocol, Sequence, Union +from typing import Any, Iterator, Literal, Sequence, Union import joblib from huggingface_hub import ModelCardData @@ -58,66 +58,6 @@ def _clean_table(table: str) -> str: return table -@dataclass -class PlotSection: - """Adds a link to a figure to the model card""" - - alt_text: str - path: str | Path - folded: bool = False - - def format(self) -> str: - text = f"![{self.alt_text}]({self.path})" - return wrap_as_details(text, folded=self.folded) - - def __repr__(self) -> str: - return repr(self.path) - - -@dataclass -class TableSection: - """Adds a table to the model card""" - - table: Mapping[str, Sequence[Any]] - folded: bool = False - - def __post_init__(self) -> None: - try: - import pandas as pd - - self._is_pandas_df = isinstance(self.table, pd.DataFrame) - except ImportError: - self._is_pandas_df = False - - if self._is_pandas_df: - ncols = len(self.table.columns) # type: ignore - else: - ncols = len(self.table) - if ncols == 0: - raise ValueError("Trying to add table with no columns") - - def format(self) -> str: - if self._is_pandas_df: - headers = self.table.columns # type: ignore - else: - headers = self.table.keys() - - table = _clean_table( - tabulate(self.table, tablefmt="github", headers=headers, showindex=False) - ) - return wrap_as_details(table, folded=self.folded) - - def __repr__(self) -> str: - if self._is_pandas_df: - nrows, ncols = self.table.shape # type: ignore - else: - # table cannot be empty, so no checks needed here - ncols = len(self.table) - key = next(iter(self.table.keys())) - nrows = len(self.table[key]) - return f"Table({nrows}x{ncols})" - - def metadata_from_config(config_path: Union[str, Path]) -> ModelCardData: """Construct a ``ModelCardData`` object from a ``config.json`` file. @@ -241,27 +181,14 @@ def _getting_started_code( @dataclass class Section: - """Building block of the model card. - - The model card is represented internally as a dict with keys being strings - and values being Sections. The key is identical to the section title. - - Additionally, the section may hold content in the form of strings (can be an - empty string) or a ``Formattable``, which is simply an object with a - ``format`` method that returns a string. + """Base (data)class for all types of sections. - The section can contain subsections, which again are dicts of - string keys and section values (the dict can be empty). Therefore, the model - card representation forms a tree structure, making use of the fact that dict - order is preserved. - - The section may also contain a ``visible`` flag, which determines if the - section will be shown when the card is rendered. + Subclasses should implement at the very least the ``.format`` method. """ title: str - content: Formattable | str + content: str subsections: dict[str, Section] = field(default_factory=dict) visible: bool = True @@ -279,13 +206,14 @@ def select(self, key: str) -> Section: ------- section : Section A dataclass containing all information relevant to the selected - section. Those are the title, the content, and subsections (in a - dict). + section. Those are the title, the content, subsections (in a dict), + and additional fields that depend on the type of section. Raises ------ KeyError If the given section name was not found, a ``KeyError`` is raised. + """ section_names = split_subsection_names(key) # check that no section name is empty @@ -293,15 +221,120 @@ def select(self, key: str) -> Section: msg = f"Section name cannot be empty but got '{key}'" raise KeyError(msg) - section = self + section: Section = self for section_name in section_names: section = section.subsections[section_name] return section + def format(self) -> str: + """Format determines how the content of this section will be rendered.""" + raise NotImplementedError + + def __repr__(self) -> str: + """repr determines how the content of this section is shown in the + Card's repr""" + return self.content + + +@dataclass(repr=False) +class PlainSection(Section): + """Building block of the model card. + + The model card is represented internally as a dict with keys being strings + and values being Sections. The key is identical to the section title. + + Additionally, the section may hold content in the form of strings (can be an + empty string) or a ``Formattable``, which is simply an object with a + ``format`` method that returns a string. + + The section can contain subsections, which again are dicts of + string keys and section values (the dict can be empty). Therefore, the model + card representation forms a tree structure, making use of the fact that dict + order is preserved. + + The section may also contain a ``visible`` flag, which determines if the + section will be shown when the card is rendered. + + """ + + def format(self): + return self.content + + +@dataclass +class PlotSection(Section): + """Adds a link to a figure to the model card""" + + path: str | Path = "" + alt_text: str = "" + folded: bool = False + + def __post_init__(self) -> None: + if not self.path: + raise TypeError(f"{self.__class__.__name__} requires a path") + + def format(self) -> str: + # if no alt text provided, fall back to figure path + alt_text = self.alt_text or self.path + text = f"![{alt_text}]({self.path})" + val = wrap_as_details(text, folded=self.folded) + if self.content: + val = f"{self.content}\n\n{val}" + return val + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.path})" + + +@dataclass +class TableSection(Section): + """Adds a table to the model card""" + + table: Mapping[str, Sequence[Any]] = field(default_factory=dict) + folded: bool = False + + def __post_init__(self) -> None: + self._check_table() + + def _check_table(self) -> None: + try: + import pandas as pd + + self._is_pandas_df = isinstance(self.table, pd.DataFrame) + except ImportError: + self._is_pandas_df = False + + if self._is_pandas_df: + ncols = len(self.table.columns) # type: ignore + else: + ncols = len(self.table) + if ncols == 0: + raise ValueError("Trying to add table with no columns") -class Formattable(Protocol): def format(self) -> str: - ... # pragma: no cover + if self._is_pandas_df: + headers = self.table.columns # type: ignore + else: + headers = self.table.keys() + + table = _clean_table( + tabulate(self.table, tablefmt="github", headers=headers, showindex=False) + ) + val = wrap_as_details(table, folded=self.folded) + + if self.content: + val = f"{self.content}\n\n{val}" + return val + + def __repr__(self) -> str: + if self._is_pandas_df: + nrows, ncols = self.table.shape # type: ignore + else: + # table cannot be empty, so no checks needed here + ncols = len(self.table) + key = next(iter(self.table.keys())) + nrows = len(self.table[key]) + return f"{self.__class__.__name__}({nrows}x{ncols})" def _load_model(model: Any, trusted=False) -> Any: @@ -443,10 +476,10 @@ class Card: model=LogisticRegression(random_state=0, solver='liblinear'), metadata.license=mit, Model description=This is the best model, - Model description/Training Procedure/... | | warm_start | False | , + Model description/Training Procedure/Hyperparameters=TableSection(15x2), Model description/Training Procedure/..., - Model description/Evaluation Results=...ccuracy | 0.96 | | f1 score | 0.96 |, - Model description/Confusion Matrix=...confusion_matrix.png'), + Model description/Evaluation Results=TableSection(2x2), + Model description/Confusion Matrix=Pl.../confusion_matrix.png), Model description/Model name=This model is called Bob, A new section=Please rate my model, ) @@ -513,7 +546,7 @@ def get_model(self) -> Any: # model has changed, but at the moment we have no way of knowing that return model - def add(self, **kwargs: str | Formattable) -> Self: + def add(self, **kwargs: str) -> Self: """Add new section(s) to the model card. Add one or multiple sections to the model card. The section names are @@ -685,7 +718,7 @@ def delete(self, key: str | Sequence[str]) -> None: parent_section = self._select(subsection_names, create=False) del parent_section[leaf_node_name] - def _add_single(self, key: str, val: Formattable | str) -> Section: + def _add_single(self, key: str, val: str | Section) -> Section: """Add a single section. If the (sub)section does not exist, it is created. Otherwise, the @@ -696,8 +729,8 @@ def _add_single(self, key: str, val: Formattable | str) -> Section: key: str The name of the (sub)section. - val: str or Formattable - The value to assign to the (sub)section. + val: Section + TODO Returns ------- @@ -708,13 +741,24 @@ def _add_single(self, key: str, val: Formattable | str) -> Section: *subsection_names, leaf_node_name = split_subsection_names(key) section = self._select(subsection_names) - if leaf_node_name in section: - # entry exists, only overwrite content - section[leaf_node_name].content = val + if isinstance(val, str): + # val is a str, create a PlainSection + new_section: Section = PlainSection(title=leaf_node_name, content=val) else: - # entry does not exist, create a new one - section[leaf_node_name] = Section(title=leaf_node_name, content=val) + # val is already a section and can be used as is + new_section = val + + if leaf_node_name in section: + # entry exists, preserve its subsections + old_section = section[leaf_node_name] + if ( + new_section.subsections + and new_section.subsections != old_section.subsections + ): + raise ValueError("Uh oh") # FIXME + new_section.subsections = old_section.subsections + section[leaf_node_name] = new_section return section[leaf_node_name] def add_model_plot( @@ -764,12 +808,14 @@ def add_model_plot( if self.template == Templates.skops.value: description = "The model plot is below." - self._add_model_plot(self.get_model(), section=section, description=description) + self._add_model_plot( + self.get_model(), section_name=section, description=description + ) return self def _add_model_plot( - self, model: Any, section: str, description: str | None + self, model: Any, section_name: str, description: str | None ) -> None: """Add model plot section @@ -787,7 +833,10 @@ def _add_model_plot( else: content = model_plot_div - self._add_single(section, content) + description = description or "" + title = split_subsection_names(section_name)[-1] + section = PlainSection(title=title, content=content) + self._add_single(section_name, section) def add_hyperparams( self, section: str | None = None, description: str | None = None @@ -826,42 +875,27 @@ def add_hyperparams( description = "The model is trained with below hyperparameters." self._add_hyperparams( - self.get_model(), section=section, description=description + self.get_model(), section_name=section, description=description ) return self def _add_hyperparams( - self, model: Any, section: str, description: str | None + self, model: Any, section_name: str, description: str | None ) -> None: """Add hyperparameter section. The model should be a loaded sklearn model, not a path. """ - hyperparameter_dict = model.get_params(deep=True) - table = _clean_table( - tabulate( - list(hyperparameter_dict.items()), - headers=["Hyperparameter", "Value"], - tablefmt="github", - ) - ) - table_folded = textwrap.dedent( - """ -
- Click to expand - - {} - -
""" - ).format(table) + params = model.get_params(deep=True) + table = {"Hyperparameter": list(params.keys()), "Value": list(params.values())} - if description: - content = f"{description}\n{table_folded}" - else: - content = table_folded - - self._add_single(section, content) + description = description or "" + title = split_subsection_names(section_name)[-1] + section = TableSection( + title=title, content=description, table=table, folded=True + ) + self._add_single(section_name, section) def add_get_started_code( self, @@ -947,7 +981,7 @@ def add_get_started_code( def _add_get_started_code( self, - section: str, + section_name: str, file_name: str, model_format: Literal["pickle", "skops"], description: str | None, @@ -965,9 +999,18 @@ def _add_get_started_code( else: content = code - self._add_single(section, content) + title = split_subsection_names(section_name)[-1] + section = PlainSection(title=title, content=content) + self._add_single(section_name, section) - def add_plot(self, *, folded=False, **kwargs: str) -> Self: + def add_plot( + self, + *, + description: str | None = None, + alt_text: str | None = None, + folded=False, + **kwargs: str, + ) -> Self: """Add plots to the model card. The plot should be saved on the file system and the path passed as @@ -975,6 +1018,10 @@ def add_plot(self, *, folded=False, **kwargs: str) -> Self: Parameters ---------- + description: TODO + + alt_text: TODO + folded: bool (default=False) If set to ``True``, the plot will be enclosed in a ``details`` tag. That means the content is folded by default and users have to click @@ -994,14 +1041,26 @@ def add_plot(self, *, folded=False, **kwargs: str) -> Self: Card object. """ + description = description or "" for section_name, plot_path in kwargs.items(): - plot_name = split_subsection_names(section_name)[-1] - section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded) + title = split_subsection_names(section_name)[-1] + alt_text = alt_text or title # TODO write test + section = PlotSection( + title=title, + content=description, + alt_text=alt_text, + path=plot_path, + folded=folded, + ) self._add_single(section_name, section) return self def add_table( - self, *, folded: bool = False, **kwargs: dict["str", list[Any]] + self, + *, + description: str | None = None, + folded: bool = False, + **kwargs: dict["str", list[Any]], ) -> Self: """Add a table to the model card. @@ -1027,6 +1086,8 @@ def add_table( Parameters ---------- + description: TODO + folded: bool (default=False) If set to ``True``, the table will be enclosed in a ``details`` tag. That means the content is folded by default and users have to click @@ -1047,8 +1108,11 @@ def add_table( Card object. """ + description = description or "" for key, val in kwargs.items(): - section = TableSection(table=val, folded=folded) + section = TableSection( + title=key, content=description, table=val, folded=folded + ) self._add_single(key, section) return self @@ -1098,7 +1162,7 @@ def add_metrics( "the evaluation results." ) self._metrics.update(kwargs) - self._add_metrics(section, self._metrics, description=description) + self._add_metrics(section, description=description, metrics=self._metrics) return self def add_permutation_importances( @@ -1157,25 +1221,25 @@ def add_permutation_importances( def _add_metrics( self, - section: str, - metrics: dict[str, str | float | int], + section_name: str, description: str | None, + metrics: dict[str, str | float | int], ) -> None: """Add metrics to the Evaluation Results section.""" if self._metrics: - data_transposed = zip(*self._metrics.items()) # make column oriented - inp = {key: val for key, val in zip(["Metric", "Value"], data_transposed)} - table = TableSection(inp).format() + # transpose from row oriented to column oriented + data_transposed = zip(*self._metrics.items()) + table = { + key: list(val) for key, val in zip(["Metric", "Value"], data_transposed) + } else: # create empty table - table = TableSection({"Metric": [], "Value": []}).format() + table = {"Metric": [], "Value": []} - if description: - content = f"{description}\n\n{table}" - else: - content = table - - self._add_single(section, content) + description = description or "" + title = split_subsection_names(section_name)[-1] + section = TableSection(title=title, content=description, table=table) + self._add_single(section_name, section) def _generate_metadata(self, metadata: ModelCardData) -> Iterator[str]: """Yield metadata in yaml format""" @@ -1192,24 +1256,21 @@ def _generate_content( content. """ - for val in data.values(): - if not val.visible: + for section in data.values(): + if not section.visible: continue - title = f"{depth * '#'} {val.title}" + title = f"{depth * '#'} {section.title}" yield title - if isinstance(val.content, str): - yield val.content - else: # is a Formattable - yield val.content.format() + yield section.format() - if val.subsections: - yield from self._generate_content(val.subsections, depth=depth + 1) + if section.subsections: + yield from self._generate_content(section.subsections, depth=depth + 1) def _iterate_content( self, data: dict[str, Section], parent_section: str = "" - ) -> Iterator[tuple[str, Formattable | str]]: + ) -> Iterator[tuple[str, Section]]: """Yield tuples of title and (non-formatted) content.""" for val in data.values(): if parent_section: @@ -1217,7 +1278,7 @@ def _iterate_content( else: title = val.title - yield title, val.content + yield title, val if val.subsections: yield from self._iterate_content(val.subsections, parent_section=title) @@ -1252,15 +1313,14 @@ def __repr__(self) -> str: # repr for contents content_reprs = [] - for title, content in self._iterate_content(self._data): + for title, section in self._iterate_content(self._data): + content = section.format() if not content: continue - if isinstance(content, str) and content.rstrip("`").rstrip().endswith( - CONTENT_PLACEHOLDER - ): + if content.rstrip("`").rstrip().endswith(CONTENT_PLACEHOLDER): # if content is just some default text, no need to show it continue - content_reprs.append(self._format_repr(f"{title}={content},")) + content_reprs.append(self._format_repr(f"{title}={section},")) content_repr = "\n".join(content_reprs) # combine all parts diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 9ad94277..e550b465 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -8,7 +8,7 @@ import numpy as np import pytest import sklearn -from huggingface_hub import CardData, metadata_load +from huggingface_hub import ModelCardData, metadata_load from sklearn.datasets import load_iris from sklearn.inspection import permutation_importance from sklearn.linear_model import LinearRegression, LogisticRegression @@ -263,7 +263,7 @@ def expected(self): def test_default(self, model_card, expected): result = model_card.select( "Model description/Training Procedure/Hyperparameters" - ).content + ).format() # remove multiple whitespaces and dashes, as they're not important and may # differ depending on OS @@ -273,7 +273,7 @@ def test_default(self, model_card, expected): def test_other_section(self, model_card, expected): model_card.add_hyperparams(section="Other section") - result = model_card.select("Other section").content + result = model_card.select("Other section").format() # remove multiple whitespaces and dashes, as they're not important and may # differ depending on OS @@ -285,7 +285,7 @@ def test_other_description(self, model_card, expected): model_card.add_hyperparams(description="Awesome hyperparams") result = model_card.select( "Model description/Training Procedure/Hyperparameters" - ).content + ).format() assert result.startswith("Awesome hyperparams") @pytest.mark.parametrize("template", CUSTOM_TEMPLATES) @@ -305,9 +305,9 @@ def test_add_twice(self, model_card): # of sense text1 = model_card.select( "Model description/Training Procedure/Hyperparameters" - ).content + ).format() model_card.add_hyperparams(section="Other section") - text2 = model_card.select("Other section").content + text2 = model_card.select("Other section").format() assert text1 == text2 @@ -321,7 +321,7 @@ def get_params(self, deep=False): model_card = Card(EstimatorWithLbInParams()) section_name = "Model description/Training Procedure/Hyperparameters" - text_hyperparams = model_card.select(section_name).content + text_hyperparams = model_card.select(section_name).format() # remove multiple whitespaces, as they're not important text_cleaned = _strip_multiple_chars(text_hyperparams, " ") @@ -333,13 +333,13 @@ class TestAddMetrics: def test_default(self, model_card): # by default, don't add a table, as there are no metrics - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() expected = "[More Information Needed]" assert result == expected def test_empty_metrics_table(self, model_card): model_card.add_metrics() - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() expected = ( "You can find the details about evaluation process and the evaluation " "results.\n\n" @@ -354,7 +354,7 @@ def test_multiple_metrics(self, model_card): f1=0.1, # float awesomeness=123, # int ) - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() expected = ( "You can find the details about evaluation process and the evaluation " "results.\n\n" @@ -368,7 +368,7 @@ def test_multiple_metrics(self, model_card): def test_other_section(self, model_card): model_card.add_metrics(accuracy=0.9, section="Other section") - result = model_card.select("Other section").content + result = model_card.select("Other section").format() expected = ( "You can find the details about evaluation process and the evaluation " "results.\n\n" @@ -380,7 +380,7 @@ def test_other_section(self, model_card): def test_other_description(self, model_card): model_card.add_metrics(accuracy=0.9, description="Awesome metrics") - result = model_card.select("Model description/Evaluation Results").content + result = model_card.select("Model description/Evaluation Results").format() assert result.startswith("Awesome metrics") @pytest.mark.parametrize("template", CUSTOM_TEMPLATES) @@ -398,9 +398,9 @@ def test_add_twice(self, model_card): # it's possible to add the section twice, even if it doesn't make a lot # of sense model_card.add_metrics(accuracy=0.9) - text1 = model_card.select("Model description/Evaluation Results").content + text1 = model_card.select("Model description/Evaluation Results").format() model_card.add_metrics(section="Other section") - text2 = model_card.select("Other section").content + text2 = model_card.select("Other section").format() assert text1 == text2 @@ -539,7 +539,7 @@ def model_card_skops(self, metadata_skops): def test_default_pickle(self, model_card): # by default, don't add a table, as there are no metrics - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -556,7 +556,7 @@ def test_default_pickle(self, model_card): def test_default_skops(self, model_card_skops): # by default, don't add a table, as there are no metrics - result = model_card_skops.select("How to Get Started with the Model").content + result = model_card_skops.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -590,18 +590,18 @@ def to_dict(self): def test_other_section(self, model_card): model_card.add_get_started_code(section="Other section") - result = model_card.select("Other section").content + result = model_card.select("Other section").format() expected = "Use the code below to get started with the model." assert result.startswith(expected) def test_other_description(self, model_card): model_card.add_get_started_code(description="Awesome code") - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() assert result.startswith("Awesome code") def test_other_filename(self, model_card): model_card.add_get_started_code(file_name="foobar.pkl") - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -618,7 +618,7 @@ def test_other_filename(self, model_card): def test_other_model_format(self, model_card): model_card.add_get_started_code(model_format="skops") - result = model_card.select("How to Get Started with the Model").content + result = model_card.select("How to Get Started with the Model").format() expected = ( "Use the code below to get started with the model.\n\n" "```python\n" @@ -670,9 +670,9 @@ def test_custom_template_no_section_raises(self, template): def test_add_twice(self, model_card): # it's possible to add the section twice, even if it doesn't make a lot # of sense - text1 = model_card.select("How to Get Started with the Model").content + text1 = model_card.select("How to Get Started with the Model").format() model_card.add_get_started_code(section="Other section") - text2 = model_card.select("Other section").content + text2 = model_card.select("Other section").format() assert text1 == text2 @@ -952,7 +952,7 @@ def test_add_plot(self, destination_path, model_card): plt.plot([4, 5, 6, 7]) plt.savefig(Path(destination_path) / "fig1.png") model_card = model_card.add_plot(fig1="fig1.png") - plot_content = model_card.select("fig1").content.format() + plot_content = model_card.select("fig1").format() assert plot_content == "![fig1](fig1.png)" def test_add_plot_to_existing_section(self, destination_path, model_card): @@ -961,7 +961,7 @@ def test_add_plot_to_existing_section(self, destination_path, model_card): plt.plot([4, 5, 6, 7]) plt.savefig(Path(destination_path) / "fig1.png") model_card = model_card.add_plot(**{"Model description/Figure 1": "fig1.png"}) - plot_content = model_card.select("Model description/Figure 1").content.format() + plot_content = model_card.select("Model description/Figure 1").format() assert plot_content == "![Figure 1](fig1.png)" @@ -1129,34 +1129,17 @@ def expected_lines(self): card_repr = """ Card( model=LinearRegression(fit_intercept=False), - Model description/Training Procedure/...ed | | positive | False | , + Model description/Training Procedure/Hyperparameters=TableSection(4x2), Model description/Training Procedure/..., Model Card Authors=Jane Doe, - Figures/ROC='ROC.png', - Figures/Confusion matrix='confusion_matrix.jpg', + Figures/ROC=PlotSection(ROC.png), + Figures/Confusion matrix=PlotSection(confusion_matrix.jpg), Model Description=A description, - Search Results=Table(3x2), + Search Results=TableSection(3x2), ) """ expected = textwrap.dedent(card_repr).strip() lines = expected.split("\n") - - # TODO: remove when dropping sklearn v0.24 and when dropping v1.1 and - # below. This is because the "normalize" parameter was changed after - # v0.24 will be removed completely in sklearn v1.2. - major, minor, *_ = sklearn.__version__.split(".") - if int(major) < 1: - # v0.24: "deprecated" -> "False" - lines[2] = ( - " Model description/Training Procedure/...se | | positive | False | " - "," - ) - elif int(minor) >= 2: - # >= v1.2: remove argument completely - lines[2] = ( - " Model description/Training Procedure/... | | | positive | False | " - "," - ) return lines @pytest.mark.parametrize("meth", [repr, str]) @@ -1208,7 +1191,7 @@ def test_without_model_attribute(self, card: Card, meth, expected_lines): @pytest.mark.parametrize("meth", [repr, str]) def test_with_metadata(self, card: Card, meth, expected_lines): - metadata = CardData( + metadata = ModelCardData( language="fr", license="bsd", library_name="sklearn", @@ -1299,30 +1282,47 @@ def test_load_model_file_not_found(self, meth): class TestPlotSection: def test_format_path_is_str(self): - section = PlotSection(alt_text="some title", path="path/plot.png") + title, description = "", "" + section = PlotSection( + title, description, alt_text="some title", path="path/plot.png" + ) expected = "![some title](path/plot.png)" assert section.format() == expected def test_format_path_is_pathlib(self): - section = PlotSection(alt_text="some title", path=Path("path") / "plot.png") + title, description = "", "" + section = PlotSection( + title, description, alt_text="some title", path=Path("path") / "plot.png" + ) expected = f"![some title](path{os.path.sep}plot.png)" assert section.format() == expected @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr(self, meth): - section = PlotSection(alt_text="some title", path="path/plot.png") - expected = "'path/plot.png'" + title, description = "", "" + section = PlotSection( + title, description, alt_text="some title", path="path/plot.png" + ) + expected = "PlotSection(path/plot.png)" assert meth(section) == expected def test_str(self): - section = PlotSection(alt_text="some title", path="path/plot.png") - expected = "'path/plot.png'" + title, description = "", "" + section = PlotSection( + title, description, alt_text="some title", path="path/plot.png" + ) + expected = "PlotSection(path/plot.png)" assert str(section) == expected @pytest.mark.parametrize("folded", [True, False]) def test_folded(self, folded): + title, description = "", "" section = PlotSection( - alt_text="some title", path="path/plot.png", folded=folded + title, + description, + alt_text="some title", + path="path/plot.png", + folded=folded, ) output = section.format() if folded: @@ -1330,6 +1330,10 @@ def test_folded(self, folded): else: assert "
" not in output + def test_add_with_description(self): + # FIXME + pass + class TestTableSection: @pytest.fixture @@ -1337,7 +1341,8 @@ def table_dict(self): return {"split": [1, 2, 3], "score": [4, 5, 6]} def test_table_is_dict(self, table_dict): - section = TableSection(table=table_dict) + title, description = "", "" + section = TableSection(title, description, table=table_dict) expected = """| split | score | |---------|---------| | 1 | 4 | @@ -1348,7 +1353,8 @@ def test_table_is_dict(self, table_dict): def test_table_is_dataframe(self, table_dict): pd = pytest.importorskip("pandas") df = pd.DataFrame(table_dict) - section = TableSection(table=df) + title, description = "", "" + section = TableSection(title, description, table=df) expected = """| split | score | |---------|---------| | 1 | 4 | @@ -1358,16 +1364,18 @@ def test_table_is_dataframe(self, table_dict): @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr_table_is_dict(self, table_dict, meth): - section = TableSection(table=table_dict) - expected = "Table(3x2)" + title, description = "", "" + section = TableSection(title, description, table=table_dict) + expected = "TableSection(3x2)" assert meth(section) == expected @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr_table_is_dataframe(self, table_dict, meth): pd = pytest.importorskip("pandas") df = pd.DataFrame(table_dict) - section = TableSection(table=df) - expected = "Table(3x2)" + title, description = "", "" + section = TableSection(title, description, table=df) + expected = "TableSection(3x2)" assert meth(section) == expected @pytest.mark.parametrize("table", [{}, "pandas"]) @@ -1377,9 +1385,10 @@ def test_raise_error_empty_table(self, table): pd = pytest.importorskip("pandas") table = pd.DataFrame([]) + title, description = "", "" msg = "Trying to add table with no columns" with pytest.raises(ValueError, match=msg): - TableSection(table=table) + TableSection(title, description, table=table) @pytest.mark.parametrize("table", [{"col0": []}, "pandas"]) def test_table_with_no_rows_works(self, table): @@ -1388,17 +1397,20 @@ def test_table_with_no_rows_works(self, table): pd = pytest.importorskip("pandas") table = pd.DataFrame(data=[], columns=["col0"]) - TableSection(table=table).format() # no error raised + title, description = "", "" + TableSection(title, description, table=table).format() # no error raised def test_pandas_not_installed(self, table_dict, pandas_not_installed): # use pandas_not_installed fixture from conftest.py to pretend that # pandas is not installed - section = TableSection(table=table_dict) + title, description = "", "" + section = TableSection(title, description, table=table_dict) assert section._is_pandas_df is False @pytest.mark.parametrize("folded", [True, False]) def test_folded(self, table_dict, folded): - section = TableSection(table=table_dict, folded=folded) + title, description = "", "" + section = TableSection(title, description, table=table_dict, folded=folded) output = section.format() if folded: assert "
" in output @@ -1429,7 +1441,8 @@ def __repr__(self) -> str: line breaks """, ] - section = TableSection(table=table_dict) + title, description = "", "" + section = TableSection(title, description, table=table_dict) expected = """| split | score | with break | |-|-|-| | 1 | 4 | obj
with lb | @@ -1442,6 +1455,10 @@ def __repr__(self) -> str: result = _strip_multiple_chars(result, "-") assert result == expected + def test_add_table_with_description(self): + # FIXME + pass + class TestCustomTemplate: @pytest.fixture @@ -1461,17 +1478,17 @@ def card(self, template): def test_add_model_plot(self, card): card.add_model_plot(section="Model/Model plot") - content = card.select("Model/Model plot").content + content = card.select("Model/Model plot").format() assert "LinearRegression" in content def test_add_hyperparams(self, card): card.add_hyperparams(section="Model/Hyperparams") - content = card.select("Model/Hyperparams").content + content = card.select("Model/Hyperparams").format() assert "fit_intercept" in content def test_add_metrics(self, card): card.add_metrics(accuracy=0.1, section="Model/Metrics") - content = card.select("Model/Metrics").content + content = card.select("Model/Metrics").format() assert "accuracy" in content assert "0.1" in content From d2585a03e6b908a1daca64551bdb80ddea6b3789 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Thu, 2 Mar 2023 17:22:17 +0100 Subject: [PATCH 2/7] Add missing tests and docstrings --- skops/card/_model_card.py | 41 +++++++++++---- skops/card/tests/test_card.py | 98 +++++++++++++++++++++++++++++++++-- 2 files changed, 125 insertions(+), 14 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index e996adad..c67edded 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -729,8 +729,10 @@ def _add_single(self, key: str, val: str | Section) -> Section: key: str The name of the (sub)section. - val: Section - TODO + val: str or Section + The value to assign to the (sub)section. If this is already a + section, leave it as it is. If it's a string, create a + :class:`skops.card._model_card.PlainSection`. Returns ------- @@ -751,11 +753,14 @@ def _add_single(self, key: str, val: str | Section) -> Section: if leaf_node_name in section: # entry exists, preserve its subsections old_section = section[leaf_node_name] - if ( - new_section.subsections - and new_section.subsections != old_section.subsections + if new_section.subsections and ( + new_section.subsections != old_section.subsections ): - raise ValueError("Uh oh") # FIXME + msg = ( + f"Trying to override section '{leaf_node_name}' but found " + "conflicting subsections." + ) + raise ValueError(msg) new_section.subsections = old_section.subsections section[leaf_node_name] = new_section @@ -1018,9 +1023,19 @@ def add_plot( Parameters ---------- - description: TODO - - alt_text: TODO + description: str or None (default=None) + If a string is passed as description, it is shown before the figure. + If multiple figures are added with one call, they all get the same + description. To add multiple figures with different descriptions, + call this method multiple times. + + alt_text: : str or None (default=None) + If a string is passed as ``alt_text``, it is used as the alternative + text for the figure (i.e. what is shown if the figure cannot be + rendered). If this argument is ``None``, the alt_text will just be + the same as the section title. If multiple figures are added with + one call, they all get the same alt text. To add multiple figures + with different alt texts, call this method multiple times. folded: bool (default=False) If set to ``True``, the plot will be enclosed in a ``details`` tag. @@ -1044,7 +1059,7 @@ def add_plot( description = description or "" for section_name, plot_path in kwargs.items(): title = split_subsection_names(section_name)[-1] - alt_text = alt_text or title # TODO write test + alt_text = alt_text or title section = PlotSection( title=title, content=description, @@ -1086,7 +1101,11 @@ def add_table( Parameters ---------- - description: TODO + description: str or None (default=None) + If a string is passed as description, it is shown before the table. + If multiple tables are added with one call, they all get the same + description. To add multiple tables with different descriptions, + call this method multiple times. folded: bool (default=False) If set to ``True``, the table will be enclosed in a ``details`` tag. diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index e550b465..1a012c59 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -19,6 +19,7 @@ from skops.card import Card, metadata_from_config from skops.card._model_card import ( SKOPS_TEMPLATE, + PlainSection, PlotSection, TableSection, _load_model, @@ -863,6 +864,70 @@ def test_add_content_to_existing_section(self, model_card): assert num_subsection_before == num_subsection_after assert section.content == "sklearn FTW" + def test_add_plain_section_works(self, model_card): + # It is allowed to add a *Section object, but it's not documented and + # users should normally not use that feature + section = PlainSection("title may differ from section name", "some content") + model_card.add( + a_string="normal string", + a_section=section, + ) + assert model_card.select("a_section") == section + + def test_add_section_preserves_subsections(self, model_card): + # As explained in the previous test, users can theoretically add section + # instances. If they override an existing section with a new section, + # the subsections of the existing section should be preserved. + + # first let's add a section and a subsection + model_card.add(**{"new section": "hello", "new section/subsection": "world"}) + assert model_card.select("new section").format() == "hello" + assert model_card.select("new section/subsection").format() == "world" + + # now let's override the section, the subsection should be preserved + new_section = PlainSection("new section", "bonjour") + model_card.add(**{"new section": new_section}) + assert model_card.select("new section").format() == "bonjour" + assert model_card.select("new section/subsection").format() == "world" + + def test_add_section_with_identical_subsection_preserves_subsections( + self, model_card + ): + # As explained in the previous tests, users can theoretically add + # section instances. If they override an existing section with a new + # section, the subsections of the existing section should be preserved. + # If the new section they add has its own subsections, and these + # subsections are identical to the old subsections, that should be fine. + + # first let's add a section and a subsection + model_card.add(**{"new section": "hello", "new section/subsection": "world"}) + + # now let's override the section using the same subsections + old_subsection = model_card.select("new section").subsections + new_section = PlainSection("new section", "bonjour", subsections=old_subsection) + model_card.add(**{"new section": new_section}) + assert model_card.select("new section").format() == "bonjour" + assert model_card.select("new section/subsection").format() == "world" + + def test_add_section_with_different_subsection_raises(self, model_card): + # This is the same as the previous test, but now the section used to + # override the previous section has different subsections. Now we don't + # know what to do and should raise. This is okay because normally, a + # user shouldn't add section instances anyway. + + # first let's add a section and a subsection + model_card.add(**{"new section": "hello", "new section/subsection": "world"}) + + # now let's override the section using different subsections + new_subsection = {"new subsection": PlainSection("subsection", "mars")} + new_section = PlainSection("new section", "bonjour", subsections=new_subsection) + + match = ( + "Trying to override section 'new section' but found conflicting subsections" + ) + with pytest.raises(ValueError, match=match): + model_card.add(**{"new section": new_section}) + class TestDelete: """Deleting sections and subsections""" @@ -964,6 +1029,24 @@ def test_add_plot_to_existing_section(self, destination_path, model_card): plot_content = model_card.select("Model description/Figure 1").format() assert plot_content == "![Figure 1](fig1.png)" + def test_add_plot_with_description(self, destination_path, model_card): + import matplotlib.pyplot as plt + + plt.plot([4, 5, 6, 7]) + plt.savefig(Path(destination_path) / "fig1.png") + model_card = model_card.add_plot(description="My fancy plot", fig1="fig1.png") + plot_content = model_card.select("fig1").format() + assert plot_content == "My fancy plot\n\n![fig1](fig1.png)" + + def test_add_plot_with_alt_text(self, destination_path, model_card): + import matplotlib.pyplot as plt + + plt.plot([4, 5, 6, 7]) + plt.savefig(Path(destination_path) / "fig1.png") + model_card = model_card.add_plot(alt_text="the figure", fig1="fig1.png") + plot_content = model_card.select("fig1").format() + assert plot_content == "![the figure](fig1.png)" + class TestMetadata: def test_adding_metadata(self, model_card): @@ -1455,9 +1538,18 @@ def __repr__(self) -> str: result = _strip_multiple_chars(result, "-") assert result == expected - def test_add_table_with_description(self): - # FIXME - pass + def test_add_table_with_description(self, model_card, table_dict): + model_card.add_table(description="My fancy table", **{"The table": table_dict}) + section = model_card.select("The table") + content = section.format() + expected = """My fancy table + +| split | score | +|---------|---------| +| 1 | 4 | +| 2 | 5 | +| 3 | 6 |""" + assert content == expected class TestCustomTemplate: From 0669b28ef9cea5a3c6c4fef2b1c58625edf0c78d Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Thu, 2 Mar 2023 17:46:40 +0100 Subject: [PATCH 3/7] Fix repr, Windows does not use / --- skops/card/_model_card.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index c67edded..d71d8100 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -479,7 +479,7 @@ class Card: Model description/Training Procedure/Hyperparameters=TableSection(15x2), Model description/Training Procedure/..., Model description/Evaluation Results=TableSection(2x2), - Model description/Confusion Matrix=Pl.../confusion_matrix.png), + Model description/Confusion Matrix=Pl...confusion_matrix.png), Model description/Model name=This model is called Bob, A new section=Please rate my model, ) From 833aced5a7e82819885c84859839df1721a07c88 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Fri, 3 Mar 2023 12:00:04 +0100 Subject: [PATCH 4/7] Add description param to permutation importances The add_permutation_importances method now also has a description argument. I also reworked the tests for permutation importances a bit: - added a new test for description - put all related tests inside a test class - moved the permutation importance computation into a fixture - moved to the new testing method of selecting section and checking its content exactly instead of checking a substring in model_card.render() --- skops/card/_model_card.py | 6 +- skops/card/tests/test_card.py | 177 +++++++++++++++++++--------------- 2 files changed, 105 insertions(+), 78 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index d71d8100..94babd15 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -1191,6 +1191,7 @@ def add_permutation_importances( plot_file: str = "permutation_importances.png", plot_name: str = "Permutation Importances", overwrite: bool = False, + description: str | None = None, ) -> Self: """Plots permutation importance and saves it to model card. @@ -1212,6 +1213,9 @@ def add_permutation_importances( Whether to overwrite the permutation importance plot file, if a plot by that name already exists. + description : str | None (default=None) + An optional description to be added before the plot. + Returns ------- self : object @@ -1234,7 +1238,7 @@ def add_permutation_importances( ax.set_title(plot_name) ax.set_xlabel("Decrease in Score") plt.savefig(plot_file) - self.add_plot(**{plot_name: plot_file}) + self.add_plot(description=description, **{plot_name: plot_file}) return self diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 1a012c59..3f18fb38 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -405,94 +405,117 @@ def test_add_twice(self, model_card): assert text1 == text2 -def test_permutation_importances( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) +class TestAddPermutationImportance: + @pytest.fixture + def importances(self, iris_estimator, iris_data): + X, y = iris_data + result = permutation_importance( + iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 + ) + return result - model_card.add_permutation_importances( - result, - X.columns, - Path(destination_path) / "importance.png", - "Permutation Importance", - ) - temp_path = Path(destination_path) / "importance.png" - assert f"![Permutation Importance]({temp_path}" in model_card.render() + def test_permutation_importances( + self, iris_data, importances, model_card, destination_path + ): + X, _ = iris_data + model_card.add_permutation_importances( + importances, + columns=X.columns, + plot_file=Path(destination_path) / "importance.png", + plot_name="Permutation Importance", + ) + temp_path = Path(destination_path) / "importance.png" + section = model_card.select("Permutation Importance") + expected = f"![Permutation Importance]({temp_path})" + assert section.format() == expected + def test_multiple_permutation_importances( + self, iris_data, iris_estimator, importances, model_card, destination_path + ): + X, y = iris_data + model_card.add_permutation_importances( + importances, X.columns, plot_file=Path(destination_path) / "importance.png" + ) -def test_multiple_permutation_importances( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) - model_card.add_permutation_importances( - result, X.columns, plot_file=Path(destination_path) / "importance.png" - ) - f1 = make_scorer(f1_score, average="micro") - result = permutation_importance( - iris_estimator, X, y, scoring=f1, n_repeats=10, random_state=42, n_jobs=2 - ) - model_card.add_permutation_importances( - result, - X.columns, - plot_file=Path(destination_path) / "f1_importance.png", - plot_name="Permutation Importance on f1", - ) - # check for default one - temp_path = Path(destination_path) / "importance.png" - assert f"![Permutation Importances]({temp_path}" in model_card.render() - # check for F1 - temp_path_f1 = Path(destination_path) / "f1_importance.png" - assert f"![Permutation Importance on f1]({temp_path_f1}" in model_card.render() + f1 = make_scorer(f1_score, average="micro") + importances_f1 = permutation_importance( + iris_estimator, X, y, scoring=f1, n_repeats=10, random_state=42, n_jobs=2 + ) + model_card.add_permutation_importances( + importances_f1, + columns=X.columns, + plot_file=Path(destination_path) / "f1_importance.png", + plot_name="Permutation Importance on f1", + ) + # check for default one + temp_path = Path(destination_path) / "importance.png" + section = model_card.select("Permutation Importances") + expected = f"![Permutation Importances]({temp_path})" + assert section.format() == expected -def test_duplicate_permutation_importances( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) - plot_path = os.path.join(destination_path, "importance.png") - model_card.add_permutation_importances(result, X.columns, plot_file=plot_path) - with pytest.raises( - ValueError, - match=( - "already exists. Set `overwrite` to `True` or pass a" - " different filename for the plot." - ), + # check for F1 + temp_path_f1 = Path(destination_path) / "f1_importance.png" + section = model_card.select("Permutation Importance on f1") + expected = f"![Permutation Importance on f1]({temp_path_f1})" + assert section.format() == expected + + def test_duplicate_permutation_importances( + self, iris_data, importances, model_card, destination_path ): + X, _ = iris_data + plot_path = os.path.join(destination_path, "importance.png") model_card.add_permutation_importances( - result, - X.columns, + importances, X.columns, plot_file=plot_path + ) + with pytest.raises( + ValueError, + match=( + "already exists. Set `overwrite` to `True` or pass a" + " different filename for the plot." + ), + ): + model_card.add_permutation_importances( + importances, + columns=X.columns, + plot_file=plot_path, + plot_name="Permutation Importance on f1", + ) + + def test_duplicate_permutation_importances_overwrite( + self, iris_data, importances, model_card, destination_path + ): + X, _ = iris_data + plot_path = os.path.join(destination_path, "importance.png") + model_card.add_permutation_importances( + importances, X.columns, plot_file=plot_path + ) + + model_card.add_permutation_importances( + importances, + columns=X.columns, plot_file=plot_path, plot_name="Permutation Importance on f1", + overwrite=True, ) + section = model_card.select("Permutation Importance on f1") + expected = f"![Permutation Importance on f1]({plot_path})" + assert section.format() == expected - -def test_duplicate_permutation_importances_overwrite( - iris_estimator, iris_data, model_card, destination_path -): - X, y = iris_data - result = permutation_importance( - iris_estimator, X, y, n_repeats=10, random_state=42, n_jobs=2 - ) - plot_path = os.path.join(destination_path, "importance.png") - model_card.add_permutation_importances(result, X.columns, plot_file=plot_path) - - model_card.add_permutation_importances( - result, - X.columns, - plot_file=plot_path, - plot_name="Permutation Importance on f1", - overwrite=True, - ) - assert f"![Permutation Importance on f1]({plot_path}" in model_card.render() + def test_permutation_importances_with_description( + self, iris_data, importances, model_card, destination_path + ): + X, _ = iris_data + model_card.add_permutation_importances( + importances, + columns=X.columns, + plot_file=Path(destination_path) / "importance.png", + description="Very important", + ) + temp_path = Path(destination_path) / "importance.png" + section = model_card.select("Permutation Importances") + expected = f"Very important\n\n![Permutation Importances]({temp_path})" + assert section.format() == expected class TestAddGetStartedCode: From 87547df411974912182b24073661f3e277e07dd9 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Fri, 3 Mar 2023 12:03:05 +0100 Subject: [PATCH 5/7] Remove Section abstract class Now, Section is a concrete class (taking the role of PlainSection). --- skops/card/_model_card.py | 55 +++++++++++++---------------------- skops/card/tests/test_card.py | 12 ++++---- 2 files changed, 27 insertions(+), 40 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 94babd15..1d2ad0fd 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -181,9 +181,22 @@ def _getting_started_code( @dataclass class Section: - """Base (data)class for all types of sections. + """Building block of the model card. + + The model card is represented internally as a dict with keys being strings + and values being ``Section``s. The key is identical to the section title. + + Additionally, the section may hold content in the form of strings (can be an + empty string) or a ``Formattable``, which is simply an object with a + ``format`` method that returns a string. + + The section can contain subsections, which again are dicts of + string keys and section values (the dict can be empty). Therefore, the model + card representation forms a tree structure, making use of the fact that dict + order is preserved. - Subclasses should implement at the very least the ``.format`` method. + The section may also contain a ``visible`` flag, which determines if the + section will be shown when the card is rendered. """ @@ -227,8 +240,7 @@ def select(self, key: str) -> Section: return section def format(self) -> str: - """Format determines how the content of this section will be rendered.""" - raise NotImplementedError + return self.content def __repr__(self) -> str: """repr determines how the content of this section is shown in the @@ -236,31 +248,6 @@ def __repr__(self) -> str: return self.content -@dataclass(repr=False) -class PlainSection(Section): - """Building block of the model card. - - The model card is represented internally as a dict with keys being strings - and values being Sections. The key is identical to the section title. - - Additionally, the section may hold content in the form of strings (can be an - empty string) or a ``Formattable``, which is simply an object with a - ``format`` method that returns a string. - - The section can contain subsections, which again are dicts of - string keys and section values (the dict can be empty). Therefore, the model - card representation forms a tree structure, making use of the fact that dict - order is preserved. - - The section may also contain a ``visible`` flag, which determines if the - section will be shown when the card is rendered. - - """ - - def format(self): - return self.content - - @dataclass class PlotSection(Section): """Adds a link to a figure to the model card""" @@ -732,7 +719,7 @@ def _add_single(self, key: str, val: str | Section) -> Section: val: str or Section The value to assign to the (sub)section. If this is already a section, leave it as it is. If it's a string, create a - :class:`skops.card._model_card.PlainSection`. + :class:`skops.card._model_card.Section`. Returns ------- @@ -744,8 +731,8 @@ def _add_single(self, key: str, val: str | Section) -> Section: section = self._select(subsection_names) if isinstance(val, str): - # val is a str, create a PlainSection - new_section: Section = PlainSection(title=leaf_node_name, content=val) + # val is a str, create a Section + new_section = Section(title=leaf_node_name, content=val) else: # val is already a section and can be used as is new_section = val @@ -840,7 +827,7 @@ def _add_model_plot( description = description or "" title = split_subsection_names(section_name)[-1] - section = PlainSection(title=title, content=content) + section = Section(title=title, content=content) self._add_single(section_name, section) def add_hyperparams( @@ -1005,7 +992,7 @@ def _add_get_started_code( content = code title = split_subsection_names(section_name)[-1] - section = PlainSection(title=title, content=content) + section = Section(title=title, content=content) self._add_single(section_name, section) def add_plot( diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 3f18fb38..3cf58992 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -19,8 +19,8 @@ from skops.card import Card, metadata_from_config from skops.card._model_card import ( SKOPS_TEMPLATE, - PlainSection, PlotSection, + Section, TableSection, _load_model, ) @@ -890,7 +890,7 @@ def test_add_content_to_existing_section(self, model_card): def test_add_plain_section_works(self, model_card): # It is allowed to add a *Section object, but it's not documented and # users should normally not use that feature - section = PlainSection("title may differ from section name", "some content") + section = Section("title may differ from section name", "some content") model_card.add( a_string="normal string", a_section=section, @@ -908,7 +908,7 @@ def test_add_section_preserves_subsections(self, model_card): assert model_card.select("new section/subsection").format() == "world" # now let's override the section, the subsection should be preserved - new_section = PlainSection("new section", "bonjour") + new_section = Section("new section", "bonjour") model_card.add(**{"new section": new_section}) assert model_card.select("new section").format() == "bonjour" assert model_card.select("new section/subsection").format() == "world" @@ -927,7 +927,7 @@ def test_add_section_with_identical_subsection_preserves_subsections( # now let's override the section using the same subsections old_subsection = model_card.select("new section").subsections - new_section = PlainSection("new section", "bonjour", subsections=old_subsection) + new_section = Section("new section", "bonjour", subsections=old_subsection) model_card.add(**{"new section": new_section}) assert model_card.select("new section").format() == "bonjour" assert model_card.select("new section/subsection").format() == "world" @@ -942,8 +942,8 @@ def test_add_section_with_different_subsection_raises(self, model_card): model_card.add(**{"new section": "hello", "new section/subsection": "world"}) # now let's override the section using different subsections - new_subsection = {"new subsection": PlainSection("subsection", "mars")} - new_section = PlainSection("new section", "bonjour", subsections=new_subsection) + new_subsection = {"new subsection": Section("subsection", "mars")} + new_section = Section("new section", "bonjour", subsections=new_subsection) match = ( "Trying to override section 'new section' but found conflicting subsections" From 18c085543a5e042a4ccaf0f16bab41ad14fe6373 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Fri, 3 Mar 2023 12:11:22 +0100 Subject: [PATCH 6/7] Address reviewer comments --- skops/card/_model_card.py | 7 ++++-- skops/card/tests/test_card.py | 44 ++++++++++++----------------------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 1d2ad0fd..0cb642d2 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -243,8 +243,11 @@ def format(self) -> str: return self.content def __repr__(self) -> str: - """repr determines how the content of this section is shown in the - Card's repr""" + """Generates the ``repr`` of this section. + + ``repr`` determines how the content of this section is shown in the + Card's repr. + """ return self.content diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py index 3cf58992..4b74d724 100644 --- a/skops/card/tests/test_card.py +++ b/skops/card/tests/test_card.py @@ -1388,44 +1388,39 @@ def test_load_model_file_not_found(self, meth): class TestPlotSection: def test_format_path_is_str(self): - title, description = "", "" section = PlotSection( - title, description, alt_text="some title", path="path/plot.png" + title="", content="", alt_text="some title", path="path/plot.png" ) expected = "![some title](path/plot.png)" assert section.format() == expected def test_format_path_is_pathlib(self): - title, description = "", "" section = PlotSection( - title, description, alt_text="some title", path=Path("path") / "plot.png" + title="", content="", alt_text="some title", path=Path("path") / "plot.png" ) expected = f"![some title](path{os.path.sep}plot.png)" assert section.format() == expected @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr(self, meth): - title, description = "", "" section = PlotSection( - title, description, alt_text="some title", path="path/plot.png" + title="", content="", alt_text="some title", path="path/plot.png" ) expected = "PlotSection(path/plot.png)" assert meth(section) == expected def test_str(self): - title, description = "", "" section = PlotSection( - title, description, alt_text="some title", path="path/plot.png" + title="", content="", alt_text="some title", path="path/plot.png" ) expected = "PlotSection(path/plot.png)" assert str(section) == expected @pytest.mark.parametrize("folded", [True, False]) def test_folded(self, folded): - title, description = "", "" section = PlotSection( - title, - description, + title="", + content="", alt_text="some title", path="path/plot.png", folded=folded, @@ -1447,8 +1442,7 @@ def table_dict(self): return {"split": [1, 2, 3], "score": [4, 5, 6]} def test_table_is_dict(self, table_dict): - title, description = "", "" - section = TableSection(title, description, table=table_dict) + section = TableSection(title="", content="", table=table_dict) expected = """| split | score | |---------|---------| | 1 | 4 | @@ -1459,8 +1453,7 @@ def test_table_is_dict(self, table_dict): def test_table_is_dataframe(self, table_dict): pd = pytest.importorskip("pandas") df = pd.DataFrame(table_dict) - title, description = "", "" - section = TableSection(title, description, table=df) + section = TableSection(title="", content="", table=df) expected = """| split | score | |---------|---------| | 1 | 4 | @@ -1470,8 +1463,7 @@ def test_table_is_dataframe(self, table_dict): @pytest.mark.parametrize("meth", [str, repr]) def test_str_and_repr_table_is_dict(self, table_dict, meth): - title, description = "", "" - section = TableSection(title, description, table=table_dict) + section = TableSection(title="", content="", table=table_dict) expected = "TableSection(3x2)" assert meth(section) == expected @@ -1479,8 +1471,7 @@ def test_str_and_repr_table_is_dict(self, table_dict, meth): def test_str_and_repr_table_is_dataframe(self, table_dict, meth): pd = pytest.importorskip("pandas") df = pd.DataFrame(table_dict) - title, description = "", "" - section = TableSection(title, description, table=df) + section = TableSection(title="", content="", table=df) expected = "TableSection(3x2)" assert meth(section) == expected @@ -1491,10 +1482,9 @@ def test_raise_error_empty_table(self, table): pd = pytest.importorskip("pandas") table = pd.DataFrame([]) - title, description = "", "" msg = "Trying to add table with no columns" with pytest.raises(ValueError, match=msg): - TableSection(title, description, table=table) + TableSection(title="", content="", table=table) @pytest.mark.parametrize("table", [{"col0": []}, "pandas"]) def test_table_with_no_rows_works(self, table): @@ -1503,20 +1493,17 @@ def test_table_with_no_rows_works(self, table): pd = pytest.importorskip("pandas") table = pd.DataFrame(data=[], columns=["col0"]) - title, description = "", "" - TableSection(title, description, table=table).format() # no error raised + TableSection(title="", content="", table=table).format() # no error raised def test_pandas_not_installed(self, table_dict, pandas_not_installed): # use pandas_not_installed fixture from conftest.py to pretend that # pandas is not installed - title, description = "", "" - section = TableSection(title, description, table=table_dict) + section = TableSection(title="", content="", table=table_dict) assert section._is_pandas_df is False @pytest.mark.parametrize("folded", [True, False]) def test_folded(self, table_dict, folded): - title, description = "", "" - section = TableSection(title, description, table=table_dict, folded=folded) + section = TableSection(title="", content="", table=table_dict, folded=folded) output = section.format() if folded: assert "
" in output @@ -1547,8 +1534,7 @@ def __repr__(self) -> str: line breaks """, ] - title, description = "", "" - section = TableSection(title, description, table=table_dict) + section = TableSection(title="", content="", table=table_dict) expected = """| split | score | with break | |-|-|-| | 1 | 4 | obj
with lb | From b66db879784c272130151f2a059d22deff831920 Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Mon, 6 Mar 2023 11:18:31 +0100 Subject: [PATCH 7/7] Reviewer comment: how to init class Missing from last commit. --- skops/card/_markup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skops/card/_markup.py b/skops/card/_markup.py index 59a2377d..2e42a25c 100644 --- a/skops/card/_markup.py +++ b/skops/card/_markup.py @@ -266,8 +266,7 @@ def _table(self, item) -> str: data_transposed = zip(*body) table = {key: val for key, val in zip(columns, data_transposed)} - title, description = "", "" - res = TableSection(title, description, table=table).format() + res = TableSection(title="", content="", table=table).format() return res def _parse_div(self, item) -> str: