From 5588cac8340a38129f1e2e7b11cd6a5dd6865b82 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Wed, 14 Dec 2022 18:53:35 -0600 Subject: [PATCH 01/11] Adds tabular regression example --- examples/plot_tabular_regression.py | 152 ++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 examples/plot_tabular_regression.py diff --git a/examples/plot_tabular_regression.py b/examples/plot_tabular_regression.py new file mode 100644 index 00000000..d121cd67 --- /dev/null +++ b/examples/plot_tabular_regression.py @@ -0,0 +1,152 @@ +""" +Tabular Regression with scikit-learn +------------------------------------- + +This example shows how you can create a Hugging Face Hub compatible repo for a +tabular regression task using scikit-learn. We also show how you can generate +a model card for the model and the task at hand. +""" + +# %% +# Imports +# ======= +# First we will import everything required for the rest of this document. + +import pickle +from pathlib import Path +from tempfile import mkdtemp, mkstemp + +import sklearn +import pandas as pd +from sklearn.datasets import load_diabetes +from sklearn.metrics import ( + mean_absolute_error, + mean_squared_error, + r2_score +) +from sklearn.datasets import load_diabetes +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import Pipeline + +import matplotlib.pyplot as plt +from skops import card, hub_utils + +# %% +# Data +# ==== +# We will use diabetes dataset from sklearn. + +X, y = load_diabetes(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# %% +# Train a Model +# ============= +# To train a model, we need to convert our data first to vectors. We will use +# StandardScalar in our pipeline. We will fit a Linear Regression model with the outputs of the scalar. +model = Pipeline([ + ('scaler', StandardScaler()), + ('linear_regression', LinearRegression()), +]) + +model.fit(X_train, y_train) + +# %% +# Inference +# ========= +# Let's see if the model works. +prediction_data = [[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]] +prediction = model.predict(prediction_data) +print(prediction) + +# %% +# Initialize a repository to save our files in +# ============================================ +# We will now initialize a repository and save our model +_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") + +with open(pkl_name, mode="bw") as f: + pickle.dump(model, file=f) + +local_repo = mkdtemp(prefix="skops-") + +hub_utils.init( + model=pkl_name, + requirements=[f"scikit-learn={sklearn.__version__}"], + dst=local_repo, + task="tabular-regression", + data=X_test, +) + +# %% +# Create a model card +# =================== +# We now create a model card, and populate its metadata with information which +# is already provided in ``config.json``, which itself is created by the call to +# :func:`.hub_utils.init` above. We will see below how we can populate the model +# card with useful information. + +model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo))) + +# %% +# Add more information +# ==================== +# So far, the model card does not tell viewers a lot about the model. Therefore, +# we add more information about the model, like a description and what its +# license is. + +model_card.metadata.license = "mit" +limitations = "This model is not ready to be used in production." +model_description = ( + "This is a Linear Regression model trained on diabetes dataset." +) +model_card_authors = "skops_user" +get_started_code = ( + "import pickle\nwith open(pkl_filename, 'rb') as file:\n clf = pickle.load(file)" +) +citation_bibtex = "bibtex\n@inproceedings{...,year={2020}}" +model_card.add( + citation_bibtex=citation_bibtex, + get_started_code=get_started_code, + model_card_authors=model_card_authors, + limitations=limitations, + model_description=model_description, +) + +# %% +# Add plots, metrics, and tables to our model card +# ================================================ +# We will now evaluate our model and add our findings to the model card. + +y_pred = model.predict(X_test) +eval_descr = ( + "The model is evaluated on validation data from 20 news group's test split," + " using accuracy and F1-score with micro average." +) +model_card.add(eval_method=eval_descr) + + +# plot the predicted values against the true values +plt.scatter(y_test, y_pred) +plt.xlabel('True values') +plt.ylabel('Predicted values') +plt.savefig(Path(local_repo) / "prediction_scatter.png") +model_card.add_plot(**{"Confusion matrix": "prediction_scatter.png"}) + +mae = mean_absolute_error(y_test, y_pred) +mse = mean_squared_error(y_test, y_pred) +r2 = r2_score(y_test, y_pred) +model_card.add_metrics(**{"mean absolute error": mae, "mean squared error": mse, "r2 score": r2}) + +# %% +# Save model card +# ================ +# We can simply save our model card by providing a path to :meth:`.Card.save`. +# The model hasn't been pushed to Hugging Face Hub yet, if you want to see how +# to push your models please refer to +# :ref:`this example `. + +# model_card.save(Path(local_repo) / "README.md") +model_card.save("./README.md") \ No newline at end of file From 263b033b41651bc3677ca9445b3af8950c25de4f Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Wed, 14 Dec 2022 18:56:16 -0600 Subject: [PATCH 02/11] Updates changes.rst --- docs/changes.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changes.rst b/docs/changes.rst index c8397a0f..7a77a751 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -11,6 +11,7 @@ skops Changelog v0.6 ---- +- Added tabular regression example. :pr: `254` by `Thomas Lazarus` v0.5 ---- @@ -102,4 +103,4 @@ Contributors :user:`Adrin Jalali `, :user:`Merve Noyan `, :user:`Benjamin Bossan `, :user:`Ayyuce Demirbas `, :user:`Prajjwal Mishra `, :user:`Francesco Cariaggi `, -:user:`Erin Aho ` +:user:`Erin Aho `, :user:`Thomas Lazarus ` From f2fbe0e09362a36d2a98659fc360a70a840a0ff2 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Thu, 15 Dec 2022 18:19:00 -0600 Subject: [PATCH 03/11] Uses skops.io instead of pickle Also updates some of the model card text Updates model card to use new sections --- examples/plot_tabular_regression.py | 42 +++++++++++++---------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/examples/plot_tabular_regression.py b/examples/plot_tabular_regression.py index d121cd67..0738c438 100644 --- a/examples/plot_tabular_regression.py +++ b/examples/plot_tabular_regression.py @@ -12,7 +12,6 @@ # ======= # First we will import everything required for the rest of this document. -import pickle from pathlib import Path from tempfile import mkdtemp, mkstemp @@ -22,7 +21,7 @@ from sklearn.metrics import ( mean_absolute_error, mean_squared_error, - r2_score + r2_score, ) from sklearn.datasets import load_diabetes from sklearn.model_selection import train_test_split @@ -32,6 +31,7 @@ import matplotlib.pyplot as plt from skops import card, hub_utils +import skops.io as sio # %% # Data @@ -68,7 +68,7 @@ _, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") with open(pkl_name, mode="bw") as f: - pickle.dump(model, file=f) + sio.dump(model, file=f) local_repo = mkdtemp(prefix="skops-") @@ -98,22 +98,25 @@ # license is. model_card.metadata.license = "mit" -limitations = "This model is not ready to be used in production." +limitations = "This model is not ready to be used in production since it is relatively basic." model_description = ( "This is a Linear Regression model trained on diabetes dataset." + " This model could be used to predict the progression of diabetes." + " This model is pretty limited and should just be used as an example of how to user `skops` and Hugging Face Hub." ) -model_card_authors = "skops_user" +model_card_authors = "skops_user, lazarust" get_started_code = ( - "import pickle\nwith open(pkl_filename, 'rb') as file:\n clf = pickle.load(file)" -) -citation_bibtex = "bibtex\n@inproceedings{...,year={2020}}" -model_card.add( - citation_bibtex=citation_bibtex, - get_started_code=get_started_code, - model_card_authors=model_card_authors, - limitations=limitations, - model_description=model_description, + "import skops.io as sio \nwith open(pkl_filename, 'rb') as file:\n clf = sio.load(file)" ) +citation_bibtex = "bibtex\n@inproceedings{...,year={2022}}" +model_card.add(**{ + "How to Get Started with the Model": get_started_code, + "Model Card Authors": model_card_authors, + "Intended uses & limitations": limitations, + "Citation": citation_bibtex, + "Model description": model_description, + "Model description/Intended uses & limitations": limitations, +}) # %% # Add plots, metrics, and tables to our model card @@ -121,19 +124,13 @@ # We will now evaluate our model and add our findings to the model card. y_pred = model.predict(X_test) -eval_descr = ( - "The model is evaluated on validation data from 20 news group's test split," - " using accuracy and F1-score with micro average." -) -model_card.add(eval_method=eval_descr) - # plot the predicted values against the true values plt.scatter(y_test, y_pred) plt.xlabel('True values') plt.ylabel('Predicted values') plt.savefig(Path(local_repo) / "prediction_scatter.png") -model_card.add_plot(**{"Confusion matrix": "prediction_scatter.png"}) +model_card.add_plot(**{"Prediction Scatter": "prediction_scatter.png"}) mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) @@ -148,5 +145,4 @@ # to push your models please refer to # :ref:`this example `. -# model_card.save(Path(local_repo) / "README.md") -model_card.save("./README.md") \ No newline at end of file +model_card.save(Path(local_repo) / "README.md") \ No newline at end of file From 5c75d7c3c9c89c3cfa3815a48ab26e0018cf4c6b Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 3 Jan 2023 19:02:34 -0600 Subject: [PATCH 04/11] Cleans up text and formatting --- examples/plot_tabular_regression.py | 81 +++++++++++++++-------------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/examples/plot_tabular_regression.py b/examples/plot_tabular_regression.py index 0738c438..5e9d8fb0 100644 --- a/examples/plot_tabular_regression.py +++ b/examples/plot_tabular_regression.py @@ -15,23 +15,18 @@ from pathlib import Path from tempfile import mkdtemp, mkstemp -import sklearn +import matplotlib.pyplot as plt import pandas as pd +import sklearn from sklearn.datasets import load_diabetes -from sklearn.metrics import ( - mean_absolute_error, - mean_squared_error, - r2_score, -) -from sklearn.datasets import load_diabetes -from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression -from sklearn.preprocessing import StandardScaler +from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score +from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler -import matplotlib.pyplot as plt -from skops import card, hub_utils import skops.io as sio +from skops import card, hub_utils # %% # Data @@ -39,17 +34,21 @@ # We will use diabetes dataset from sklearn. X, y = load_diabetes(return_X_y=True) -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 +) # %% # Train a Model # ============= # To train a model, we need to convert our data first to vectors. We will use # StandardScalar in our pipeline. We will fit a Linear Regression model with the outputs of the scalar. -model = Pipeline([ - ('scaler', StandardScaler()), - ('linear_regression', LinearRegression()), -]) +model = Pipeline( + [ + ("scaler", StandardScaler()), + ("linear_regression", LinearRegression()), + ] +) model.fit(X_train, y_train) @@ -57,9 +56,8 @@ # Inference # ========= # Let's see if the model works. -prediction_data = [[100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]] -prediction = model.predict(prediction_data) -print(prediction) +y_pred = model.predict(X_test[:5]) +print(y_pred) # %% # Initialize a repository to save our files in @@ -80,6 +78,10 @@ data=X_test, ) +if "__file__" in locals(): # __file__ not defined during docs built + # Add this script itself to the files to be uploaded for reproducibility + hub_utils.add_files(__file__, dst=local_repo) + # %% # Create a model card # =================== @@ -98,25 +100,26 @@ # license is. model_card.metadata.license = "mit" -limitations = "This model is not ready to be used in production since it is relatively basic." +limitations = ( + "This model is made for educational purposes and is not ready to be used in" + " production." +) model_description = ( - "This is a Linear Regression model trained on diabetes dataset." - " This model could be used to predict the progression of diabetes." - " This model is pretty limited and should just be used as an example of how to user `skops` and Hugging Face Hub." + "This is a Linear Regression model trained on diabetes dataset. This model could be" + " used to predict the progression of diabetes. This model is pretty limited and" + " should just be used as an example of how to user `skops` and Hugging Face Hub." ) model_card_authors = "skops_user, lazarust" -get_started_code = ( - "import skops.io as sio \nwith open(pkl_filename, 'rb') as file:\n clf = sio.load(file)" -) citation_bibtex = "bibtex\n@inproceedings{...,year={2022}}" -model_card.add(**{ - "How to Get Started with the Model": get_started_code, - "Model Card Authors": model_card_authors, - "Intended uses & limitations": limitations, - "Citation": citation_bibtex, - "Model description": model_description, - "Model description/Intended uses & limitations": limitations, -}) +model_card.add( + **{ + "Model Card Authors": model_card_authors, + "Intended uses & limitations": limitations, + "Citation": citation_bibtex, + "Model description": model_description, + "Model description/Intended uses & limitations": limitations, + } +) # %% # Add plots, metrics, and tables to our model card @@ -127,15 +130,17 @@ # plot the predicted values against the true values plt.scatter(y_test, y_pred) -plt.xlabel('True values') -plt.ylabel('Predicted values') +plt.xlabel("True values") +plt.ylabel("Predicted values") plt.savefig(Path(local_repo) / "prediction_scatter.png") model_card.add_plot(**{"Prediction Scatter": "prediction_scatter.png"}) mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) -model_card.add_metrics(**{"mean absolute error": mae, "mean squared error": mse, "r2 score": r2}) +model_card.add_metrics( + **{"Mean Absolute Error": mae, "Mean Squared Error": mse, "R-Squared Score": r2} +) # %% # Save model card @@ -145,4 +150,4 @@ # to push your models please refer to # :ref:`this example `. -model_card.save(Path(local_repo) / "README.md") \ No newline at end of file +model_card.save(Path(local_repo) / "README.md") From 1ab80e4100c48975a722617a7b8273a2d16808e3 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Wed, 25 Jan 2023 19:40:13 -0600 Subject: [PATCH 05/11] Adds Examples to documentation --- docs/examples.rst | 9 +++++++++ docs/index.rst | 1 + 2 files changed, 10 insertions(+) create mode 100644 docs/examples.rst diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 00000000..313ad631 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,9 @@ +.. _examples: + +Examples of using skops +======================= + +- Tabular Regression: + `Here `_ is an example of using skops to serialize a tabular regression model and create a model card and a Hugging Face Hub repository. +- Text Classification: + `Here `_ is an example of using skops to serialize a text classification model and create a model card and a Hugging Face Hub repository. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 0bd2efb7..6bbc5451 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -40,6 +40,7 @@ User Guide / API Reference model_card persistence modules/classes + examples Community / About ================= From bff9f262d4cd3ba916ea1a2afcb16fccc504673a Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Thu, 26 Jan 2023 21:28:34 -0600 Subject: [PATCH 06/11] Adds all examples from auto example page --- docs/examples.rst | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index 313ad631..2d63e0b5 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -1,9 +1,21 @@ .. _examples: -Examples of using skops -======================= +Examples of interactions with the Hugging Face Hub +================================================== +- Creating the Model Card: + `Here `_ is an example of using skops to create a model card that can + be used on the Hugging Face Hub. +- Putting the Model Card on the Hub: + `Here `_ is an example of using skops to put a model card on the Hugging Face + Hub. - Tabular Regression: - `Here `_ is an example of using skops to serialize a tabular regression model and create a model card and a Hugging Face Hub repository. + `Here `_ is an example of using skops to serialize a tabular + regression model and create a model card and a Hugging Face Hub repository. - Text Classification: - `Here `_ is an example of using skops to serialize a text classification model and create a model card and a Hugging Face Hub repository. \ No newline at end of file + `Here `_ is an example of using skops to serialize a text classi + fication model and create a model card and a Hugging Face Hub repository. From 2ab59417e421b6dc5c26b2abfa8ba7265203f519 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Sat, 28 Jan 2023 10:39:36 -0600 Subject: [PATCH 07/11] Updates to link to docs --- docs/examples.rst | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/docs/examples.rst b/docs/examples.rst index 2d63e0b5..16aee5af 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -4,18 +4,14 @@ Examples of interactions with the Hugging Face Hub ================================================== - Creating the Model Card: - `Here `_ is an example of using skops to create a model card that can - be used on the Hugging Face Hub. + :ref:`sphx_glr_auto_examples_plot_model_card.py` is an example of using + skops to create a model card that can be used on the Hugging Face Hub. - Putting the Model Card on the Hub: - `Here `_ is an example of using skops to put a model card on the Hugging Face - Hub. + :ref:`sphx_glr_auto_examples_plot_hf_hub.py` is an example of using skops + to put a model card on the Hugging Face Hub. - Tabular Regression: - `Here `_ is an example of using skops to serialize a tabular + :ref:`sphx_glr_auto_examples_plot_tabular_regresssion.py` is an example of using skops to serialize a tabular regression model and create a model card and a Hugging Face Hub repository. - Text Classification: - `Here `_ is an example of using skops to serialize a text classi - fication model and create a model card and a Hugging Face Hub repository. + :ref:`sphx_glr_auto_examples_plot_text_classification.py` is an example of using skops to serialize a text + classification model and create a model card and a Hugging Face Hub repository. From 50efed5a5c203cf09d494ac5d77c130f1e6bee70 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 30 Jan 2023 20:01:30 -0600 Subject: [PATCH 08/11] Adds link to more examples --- docs/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.rst b/docs/index.rst index 6bbc5451..a0a760c5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,6 +23,7 @@ The following examples are good starting points: :ref:`sphx_glr_auto_examples_plot_model_card.py` - A text classification example, and its integration with the hub: :ref:`sphx_glr_auto_examples_plot_text_classification.py` +- More Examples :ref:`_examples` In order to better understand the role of each file and their content when uploaded to Hugging Face Hub, refer to this :ref:`user guide `. You can From f390b49c252809e3bff3537485545bed35dd14ff Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 30 Jan 2023 20:12:31 -0600 Subject: [PATCH 09/11] Fixes link --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index a0a760c5..702ff68b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,7 +23,7 @@ The following examples are good starting points: :ref:`sphx_glr_auto_examples_plot_model_card.py` - A text classification example, and its integration with the hub: :ref:`sphx_glr_auto_examples_plot_text_classification.py` -- More Examples :ref:`_examples` +- More Examples: :ref:`examples` In order to better understand the role of each file and their content when uploaded to Hugging Face Hub, refer to this :ref:`user guide `. You can From 01cf81771eeca3c1d2b7508a6d2d52159b8794f2 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 30 Jan 2023 20:16:26 -0600 Subject: [PATCH 10/11] Fixes link with custom text --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 702ff68b..0d6f3cf9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,7 +23,7 @@ The following examples are good starting points: :ref:`sphx_glr_auto_examples_plot_model_card.py` - A text classification example, and its integration with the hub: :ref:`sphx_glr_auto_examples_plot_text_classification.py` -- More Examples: :ref:`examples` +- More examples :ref:`here ` In order to better understand the role of each file and their content when uploaded to Hugging Face Hub, refer to this :ref:`user guide `. You can From c3c853d3eeb2304eac0e5507f10620c62b375153 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus <46943923+lazarust@users.noreply.github.com> Date: Wed, 8 Feb 2023 09:52:28 -0600 Subject: [PATCH 11/11] Update docs/examples.rst Co-authored-by: Benjamin Bossan --- docs/examples.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples.rst b/docs/examples.rst index 16aee5af..07cc0f1f 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -10,7 +10,7 @@ Examples of interactions with the Hugging Face Hub :ref:`sphx_glr_auto_examples_plot_hf_hub.py` is an example of using skops to put a model card on the Hugging Face Hub. - Tabular Regression: - :ref:`sphx_glr_auto_examples_plot_tabular_regresssion.py` is an example of using skops to serialize a tabular + :ref:`sphx_glr_auto_examples_plot_tabular_regression.py` is an example of using skops to serialize a tabular regression model and create a model card and a Hugging Face Hub repository. - Text Classification: :ref:`sphx_glr_auto_examples_plot_text_classification.py` is an example of using skops to serialize a text