From 8510d9e1f51c4b3ab370bddff99c744c8877c52f Mon Sep 17 00:00:00 2001
From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com>
Date: Wed, 5 Oct 2022 20:22:08 +0530
Subject: [PATCH 1/6] load model in Card class
---
skops/card/_model_card.py | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py
index c978e670..76647ae4 100644
--- a/skops/card/_model_card.py
+++ b/skops/card/_model_card.py
@@ -254,7 +254,7 @@ def __init__(
model_diagram: bool = True,
metadata: Optional[CardData] = None,
) -> None:
- self.model = model
+ self.model = self._load_model(model)
self.model_diagram = model_diagram
self._eval_results = {} # type: ignore
self._template_sections: dict[str, str] = {}
@@ -373,6 +373,26 @@ def add_metrics(self, **kwargs: str) -> "Card":
self._eval_results[metric] = value
return self
+ def _load_model(model: Any) -> Any:
+ """Loads the model if provided a file path.
+
+ Parameters
+ ----------
+ model : Any
+ Str or model instance.
+
+ Returns
+ -------
+ model : object
+ Model instance.
+ """
+ if isinstance(model, str):
+ model_path = Path(model)
+ if not model_path.exists():
+ raise ValueError("Model file does not exist")
+ model = skops.io.load(model)
+ return model
+
def _generate_card(self) -> ModelCard:
"""Generate the ModelCard object
From 4367c134a90c6c5ac04b3ac94c55074d942b694a Mon Sep 17 00:00:00 2001
From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com>
Date: Wed, 5 Oct 2022 21:23:54 +0530
Subject: [PATCH 2/6] doc failing resolution
---
skops/card/_model_card.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py
index 76647ae4..47ffbeba 100644
--- a/skops/card/_model_card.py
+++ b/skops/card/_model_card.py
@@ -373,7 +373,7 @@ def add_metrics(self, **kwargs: str) -> "Card":
self._eval_results[metric] = value
return self
- def _load_model(model: Any) -> Any:
+ def _load_model(self, model: Any) -> Any:
"""Loads the model if provided a file path.
Parameters
From 8f919a04febd96cdf4dc0f0f3b3d6744258fcf9b Mon Sep 17 00:00:00 2001
From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com>
Date: Thu, 6 Oct 2022 18:18:43 +0530
Subject: [PATCH 3/6] support for pkl model file
---
skops/card/_model_card.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py
index 47ffbeba..3a9e111f 100644
--- a/skops/card/_model_card.py
+++ b/skops/card/_model_card.py
@@ -5,6 +5,7 @@
import re
import shutil
import tempfile
+import joblib
from dataclasses import dataclass
from pathlib import Path
from reprlib import Repr
@@ -390,7 +391,12 @@ def _load_model(self, model: Any) -> Any:
model_path = Path(model)
if not model_path.exists():
raise ValueError("Model file does not exist")
- model = skops.io.load(model)
+ if model_path.suffix==".pkl":
+ model = joblib.load(model_path)
+ elif model_path.suffix==".skops":
+ model = skops.io.load(model_path)
+ else:
+ raise ValueError("Model Format not supported")
return model
def _generate_card(self) -> ModelCard:
From da8d30260dc619d97d1ba35faf796259e54aa870 Mon Sep 17 00:00:00 2001
From: p-mishra1 <87666586+p-mishra1@users.noreply.github.com>
Date: Thu, 20 Oct 2022 21:40:29 +0530
Subject: [PATCH 4/6] test addtion for card class
---
skops/card/_model_card.py | 112 +++++++++++++++++++++++++---------
skops/card/tests/test_card.py | 26 ++++----
2 files changed, 97 insertions(+), 41 deletions(-)
diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py
index 3a9e111f..2d0d64ee 100644
--- a/skops/card/_model_card.py
+++ b/skops/card/_model_card.py
@@ -16,6 +16,7 @@
from tabulate import tabulate # type: ignore
import skops
+from skops.io import load
# Repr attributes can be used to control the behavior of repr
aRepr = Repr()
@@ -26,7 +27,10 @@
def wrap_as_details(text: str, folded: bool) -> str:
if not folded:
return text
- return f"\n Click to expand
\n\n{text}\n\n "
+ return (
+ "\n Click to expand"
+ f"
\n\n{text}\n\n "
+ )
def _clean_table(table: str) -> str:
@@ -93,7 +97,9 @@ def format(self) -> str:
headers = self.table.keys()
table = _clean_table(
- tabulate(self.table, tablefmt="github", headers=headers, showindex=False)
+ tabulate(
+ self.table, tablefmt="github", headers=headers, showindex=False
+ )
)
return wrap_as_details(table, folded=self.folded)
@@ -151,7 +157,9 @@ def metadata_from_config(config_path: Union[str, Path]) -> CardData:
task = config.get("sklearn", {}).get("task", None)
if task:
card_data.tags += [task]
- card_data.model_file = config.get("sklearn", {}).get("model", {}).get("file")
+ card_data.model_file = (
+ config.get("sklearn", {}).get("model", {}).get("file")
+ )
example_input = config.get("sklearn", {}).get("example_input", None)
# Documentation on what the widget expects:
# https://huggingface.co/docs/hub/models-widgets-examples
@@ -173,7 +181,7 @@ class Card:
Parameters
----------
- model: estimator object
+ model: pathlib.path, str, or sklearn estimator object
Model that will be documented.
model_diagram: bool, default=True
@@ -255,7 +263,7 @@ def __init__(
model_diagram: bool = True,
metadata: Optional[CardData] = None,
) -> None:
- self.model = self._load_model(model)
+ self._model = model
self.model_diagram = model_diagram
self._eval_results = {} # type: ignore
self._template_sections: dict[str, str] = {}
@@ -303,11 +311,15 @@ def add_plot(self, folded=False, **kwargs: str) -> "Card":
Card object.
"""
for plot_name, plot_path in kwargs.items():
- section = PlotSection(alt_text=plot_name, path=plot_path, folded=folded)
+ section = PlotSection(
+ alt_text=plot_name, path=plot_path, folded=folded
+ )
self._extra_sections.append((plot_name, section))
return self
- def add_table(self, folded: bool = False, **kwargs: dict["str", list[Any]]) -> Card:
+ def add_table(
+ self, folded: bool = False, **kwargs: dict["str", list[Any]]
+ ) -> Card:
"""Add a table to the model card.
Add a table to the model card. This can be especially useful when you
@@ -374,29 +386,54 @@ def add_metrics(self, **kwargs: str) -> "Card":
self._eval_results[metric] = value
return self
+ @property
+ def model(self):
+ model = self._load_model(self._model)
+ if model is not self._model:
+ self._model = model
+ return model
+
+ @model.setter
+ def model(self, model):
+ self._model = model
+
+ @model.deleter
+ def model(self):
+ del self._model
+
def _load_model(self, model: Any) -> Any:
- """Loads the model if provided a file path.
+ """Loads the model if provided a file path, if already a model instance,
+ return it unmodified.
Parameters
----------
- model : Any
- Str or model instance.
+ model : pathlib.path, str, or sklearn estimator
+ Path/str or the actual model instance. If a Path or str, loads the model on first call.
Returns
-------
- model : object
+ model : object
Model instance.
+
"""
- if isinstance(model, str):
- model_path = Path(model)
- if not model_path.exists():
- raise ValueError("Model file does not exist")
- if model_path.suffix==".pkl":
- model = joblib.load(model_path)
- elif model_path.suffix==".skops":
- model = skops.io.load(model_path)
- else:
- raise ValueError("Model Format not supported")
+ if not isinstance(model, (Path, str)):
+ return model
+
+ model_path = Path(model)
+ if not model_path.exists():
+ raise ValueError("Model file does not exist")
+
+ if model_path.suffix in (".pkl", ".pickle"):
+ model = joblib.load(model_path)
+ elif model_path.suffix == ".skops":
+ model = load(model_path)
+ else:
+ msg = (
+ f"Cannot interpret model suffix {model_path.suffix}, should be"
+ " '.pkl', '.pickle' or '.skops'"
+ )
+ raise ValueError(msg)
+
return model
def _generate_card(self) -> ModelCard:
@@ -429,18 +466,23 @@ def _generate_card(self) -> ModelCard:
)
else:
template_sections["get_started_code"] = (
- "import joblib\nimport json\nimport pandas as pd\nclf ="
- f' joblib.load({model_file})\nwith open("config.json") as'
+ "import joblib\nimport json\nimport pandas as"
+ " pd\nclf ="
+ f" joblib.load({model_file})\nwith"
+ ' open("config.json") as'
" f:\n "
" config ="
" json.load(f)\n"
'clf.predict(pd.DataFrame.from_dict(config["sklearn"]["example_input"]))'
)
if self.model_diagram is True:
- model_plot_div = re.sub(r"\n\s+", "", str(estimator_html_repr(self.model)))
+ model_plot_div = re.sub(
+ r"\n\s+", "", str(estimator_html_repr(self.model))
+ )
if model_plot_div.count("sk-top-container") == 1:
model_plot_div = model_plot_div.replace(
- "sk-top-container", 'sk-top-container" style="overflow: auto;'
+ "sk-top-container",
+ 'sk-top-container" style="overflow: auto;',
)
model_plot: str | None = model_plot_div
else:
@@ -465,7 +507,9 @@ def _generate_card(self) -> ModelCard:
f"{tmpdirname}/temporary_template.md",
)
# create a temporary template with the additional plots
- template_sections["template_path"] = f"{tmpdirname}/temporary_template.md"
+ template_sections[
+ "template_path"
+ ] = f"{tmpdirname}/temporary_template.md"
# add extra sections at the end of the template
with open(template_sections["template_path"], "a") as template:
if self._extra_sections:
@@ -546,13 +590,17 @@ def __repr__(self) -> str:
model = getattr(self, "model", None)
if model:
model_str = self._strip_blank(repr(model))
- model_repr = aRepr.repr(f" model={model_str},").strip('"').strip("'")
+ model_repr = (
+ aRepr.repr(f" model={model_str},").strip('"').strip("'")
+ )
else:
model_repr = None
# metadata
metadata_reprs = []
- for key, val in self.metadata.to_dict().items() if self.metadata else {}:
+ for key, val in (
+ self.metadata.to_dict().items() if self.metadata else {}
+ ):
if key == "widget":
metadata_reprs.append(" metadata.widget={...},")
continue
@@ -566,14 +614,18 @@ def __repr__(self) -> str:
template_reprs = []
for key, val in self._template_sections.items():
val = self._strip_blank(repr(val))
- template_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'"))
+ template_reprs.append(
+ aRepr.repr(f" {key}={val},").strip('"').strip("'")
+ )
template_repr = "\n".join(template_reprs)
# figures
figure_reprs = []
for key, val in self._extra_sections:
val = self._strip_blank(repr(val))
- figure_reprs.append(aRepr.repr(f" {key}={val},").strip('"').strip("'"))
+ figure_reprs.append(
+ aRepr.repr(f" {key}={val},").strip('"').strip("'")
+ )
figure_repr = "\n".join(figure_reprs)
complete_repr = "Card(\n"
diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py
index e2ed4596..f5f2fb4e 100644
--- a/skops/card/tests/test_card.py
+++ b/skops/card/tests/test_card.py
@@ -77,7 +77,9 @@ def _create_model_card_from_saved_model(
task="tabular-classification",
data=X,
)
- card = Card(iris_estimator, metadata=metadata_from_config(destination_path))
+ card = Card(
+ iris_estimator, metadata=metadata_from_config(destination_path)
+ )
card.save(Path(destination_path) / "README.md")
return card
@@ -206,7 +208,9 @@ def test_add_metrics(destination_path, model_card):
assert ("acc" in card) and ("f1" in card) and ("0.1" in card)
-def test_code_autogeneration(destination_path, pkl_model_card_metadata_from_config):
+def test_code_autogeneration(
+ destination_path, pkl_model_card_metadata_from_config
+):
# test if getting started code is automatically generated
metadata = metadata_load(local_path=Path(destination_path) / "README.md")
filename = metadata["model_file"]
@@ -288,14 +292,12 @@ def test_very_long_lines_are_shortened(self, card: Card, meth):
card.add(my_section="very long line " * 100)
result = meth(card)
expected = (
- "Card(\n model=LinearRegression(fit_intercept=False),\n"
- " model_description='A description',\n model_card_authors='Jane Doe',\n"
- " my_section='very long line very lon...line very long line very long line"
- " ',\n"
- " roc_curve='ROC_curve.png',\n"
- " confusion_matrix='confusion_matrix.jpg',\n"
- " search_results=Table(3x2),\n"
- ")"
+ "Card(\n model=LinearRegression(fit_intercept=False),\n "
+ " model_description='A description',\n model_card_authors='Jane"
+ " Doe',\n my_section='very long line very lon...line very long"
+ " line very long line ',\n roc_curve='ROC_curve.png',\n "
+ " confusion_matrix='confusion_matrix.jpg',\n "
+ " search_results=Table(3x2),\n)"
)
assert result == expected
@@ -412,7 +414,9 @@ def test_format_path_is_str(self):
assert section.format() == expected
def test_format_path_is_pathlib(self):
- section = PlotSection(alt_text="some title", path=Path("path") / "plot.png")
+ section = PlotSection(
+ alt_text="some title", path=Path("path") / "plot.png"
+ )
expected = f""
assert section.format() == expected
From 4051cff18518a1a78d89995674c834f4a3ec44f3 Mon Sep 17 00:00:00 2001
From: know-one-1
Date: Tue, 25 Oct 2022 17:24:27 +0530
Subject: [PATCH 5/6] Revert "test addtion for card class"
This reverts commit d6c98569ae4f6c3263501d9dda3714cfdbfb116c.
---
skops/card/tests/test_card.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py
index f5f2fb4e..4c255ce7 100644
--- a/skops/card/tests/test_card.py
+++ b/skops/card/tests/test_card.py
@@ -211,6 +211,7 @@ def test_add_metrics(destination_path, model_card):
def test_code_autogeneration(
destination_path, pkl_model_card_metadata_from_config
):
+
# test if getting started code is automatically generated
metadata = metadata_load(local_path=Path(destination_path) / "README.md")
filename = metadata["model_file"]
From 3c93add5987347746cf68894184eafa5d8d0fe66 Mon Sep 17 00:00:00 2001
From: know-one-1
Date: Tue, 25 Oct 2022 22:29:32 +0530
Subject: [PATCH 6/6] load model from path functionality and test addition
---
skops/card/tests/test_card.py | 398 +++++++++++++++++++++++++++++++++-
1 file changed, 390 insertions(+), 8 deletions(-)
diff --git a/skops/card/tests/test_card.py b/skops/card/tests/test_card.py
index 4c255ce7..a92ebe47 100644
--- a/skops/card/tests/test_card.py
+++ b/skops/card/tests/test_card.py
@@ -33,6 +33,19 @@ def model_card(model_diagram=True):
yield card
+@pytest.fixture
+def model_card_from_path(suffix, model_diagram=True):
+ model = fit_model()
+ save_file = tempfile.mkstemp(suffix=suffix, prefix="skops-test")[1]
+ if suffix in (".pkl", ".pickle"):
+ with open(save_file, "wb") as f:
+ pickle.dump(model, f)
+ elif suffix == ".skops":
+ dump(model, save_file)
+ card = Card(save_file, model_diagram)
+ yield card
+
+
@pytest.fixture
def iris_data():
X, y = load_iris(return_X_y=True, as_frame=True)
@@ -77,9 +90,25 @@ def _create_model_card_from_saved_model(
task="tabular-classification",
data=X,
)
- card = Card(
- iris_estimator, metadata=metadata_from_config(destination_path)
+ card = Card(iris_estimator, metadata=metadata_from_config(destination_path))
+ card.save(Path(destination_path) / "README.md")
+ return card
+
+
+def _create_model_card_from_model_path(
+ destination_path,
+ iris_data,
+ save_file,
+):
+ X, y = iris_data
+ hub_utils.init(
+ model=save_file,
+ requirements=[f"scikit-learn=={sklearn.__version__}"],
+ dst=destination_path,
+ task="tabular-classification",
+ data=X,
)
+ card = Card(save_file, metadata=metadata_from_config(destination_path))
card.save(Path(destination_path) / "README.md")
return card
@@ -102,6 +131,22 @@ def pkl_model_card_metadata_from_config(
)
+@pytest.fixture
+def skops_model_card_from_path_metadata_from_config(
+ destination_path, iris_skops_file, iris_data
+):
+ yield _create_model_card_from_model_path(
+ destination_path, iris_data, iris_skops_file
+ )
+
+
+@pytest.fixture
+def pkl_model_card_from_path_metadata_from_config(
+ destination_path, iris_pkl_file, iris_data
+):
+ yield _create_model_card_from_model_path(destination_path, iris_data, iris_pkl_file)
+
+
@pytest.fixture
def destination_path():
with tempfile.TemporaryDirectory(prefix="skops-test") as dir_path:
@@ -113,11 +158,23 @@ def test_save_model_card(destination_path, model_card):
assert (Path(destination_path) / "README.md").exists()
+@pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"])
+def test_save_model_card_from_path(destination_path, model_card_from_path):
+ model_card_from_path.save(Path(destination_path) / "README.md")
+ assert (Path(destination_path) / "README.md").exists()
+
+
def test_hyperparameter_table(destination_path, model_card):
model_card = model_card.render()
assert "fit_intercept" in model_card
+@pytest.mark.parametrize("suffix", [".pkl", ".pickle", ".skops"])
+def test_hyperparameter_table_from_path(model_card_from_path):
+ model_card_from_path = model_card_from_path.render()
+ assert "fit_intercept" in model_card_from_path
+
+
def _strip_multiple_chars(text, char):
# _strip_multiple_chars("hi there") == "hi there"
# _strip_multiple_chars("|---|--|", "-") == "|-|-|"
@@ -146,17 +203,44 @@ def test_plot_model(destination_path, model_card):
assert "