diff --git a/docs/changes.rst b/docs/changes.rst index aed3035e..b2bdfe68 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -11,6 +11,7 @@ skops Changelog v0.6 ---- +- Added tabular regression example. :pr: `254` by `Thomas Lazarus` v0.5 ---- @@ -102,4 +103,4 @@ Contributors :user:`Adrin Jalali `, :user:`Merve Noyan `, :user:`Benjamin Bossan `, :user:`Ayyuce Demirbas `, :user:`Prajjwal Mishra `, :user:`Francesco Cariaggi `, -:user:`Erin Aho ` +:user:`Erin Aho `, :user:`Thomas Lazarus ` diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 00000000..07cc0f1f --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,17 @@ +.. _examples: + +Examples of interactions with the Hugging Face Hub +================================================== + +- Creating the Model Card: + :ref:`sphx_glr_auto_examples_plot_model_card.py` is an example of using + skops to create a model card that can be used on the Hugging Face Hub. +- Putting the Model Card on the Hub: + :ref:`sphx_glr_auto_examples_plot_hf_hub.py` is an example of using skops + to put a model card on the Hugging Face Hub. +- Tabular Regression: + :ref:`sphx_glr_auto_examples_plot_tabular_regression.py` is an example of using skops to serialize a tabular + regression model and create a model card and a Hugging Face Hub repository. +- Text Classification: + :ref:`sphx_glr_auto_examples_plot_text_classification.py` is an example of using skops to serialize a text + classification model and create a model card and a Hugging Face Hub repository. diff --git a/docs/index.rst b/docs/index.rst index 0bd2efb7..0d6f3cf9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,6 +23,7 @@ The following examples are good starting points: :ref:`sphx_glr_auto_examples_plot_model_card.py` - A text classification example, and its integration with the hub: :ref:`sphx_glr_auto_examples_plot_text_classification.py` +- More examples :ref:`here ` In order to better understand the role of each file and their content when uploaded to Hugging Face Hub, refer to this :ref:`user guide `. You can @@ -40,6 +41,7 @@ User Guide / API Reference model_card persistence modules/classes + examples Community / About ================= diff --git a/examples/plot_tabular_regression.py b/examples/plot_tabular_regression.py new file mode 100644 index 00000000..5e9d8fb0 --- /dev/null +++ b/examples/plot_tabular_regression.py @@ -0,0 +1,153 @@ +""" +Tabular Regression with scikit-learn +------------------------------------- + +This example shows how you can create a Hugging Face Hub compatible repo for a +tabular regression task using scikit-learn. We also show how you can generate +a model card for the model and the task at hand. +""" + +# %% +# Imports +# ======= +# First we will import everything required for the rest of this document. + +from pathlib import Path +from tempfile import mkdtemp, mkstemp + +import matplotlib.pyplot as plt +import pandas as pd +import sklearn +from sklearn.datasets import load_diabetes +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score +from sklearn.model_selection import train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler + +import skops.io as sio +from skops import card, hub_utils + +# %% +# Data +# ==== +# We will use diabetes dataset from sklearn. + +X, y = load_diabetes(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 +) + +# %% +# Train a Model +# ============= +# To train a model, we need to convert our data first to vectors. We will use +# StandardScalar in our pipeline. We will fit a Linear Regression model with the outputs of the scalar. +model = Pipeline( + [ + ("scaler", StandardScaler()), + ("linear_regression", LinearRegression()), + ] +) + +model.fit(X_train, y_train) + +# %% +# Inference +# ========= +# Let's see if the model works. +y_pred = model.predict(X_test[:5]) +print(y_pred) + +# %% +# Initialize a repository to save our files in +# ============================================ +# We will now initialize a repository and save our model +_, pkl_name = mkstemp(prefix="skops-", suffix=".pkl") + +with open(pkl_name, mode="bw") as f: + sio.dump(model, file=f) + +local_repo = mkdtemp(prefix="skops-") + +hub_utils.init( + model=pkl_name, + requirements=[f"scikit-learn={sklearn.__version__}"], + dst=local_repo, + task="tabular-regression", + data=X_test, +) + +if "__file__" in locals(): # __file__ not defined during docs built + # Add this script itself to the files to be uploaded for reproducibility + hub_utils.add_files(__file__, dst=local_repo) + +# %% +# Create a model card +# =================== +# We now create a model card, and populate its metadata with information which +# is already provided in ``config.json``, which itself is created by the call to +# :func:`.hub_utils.init` above. We will see below how we can populate the model +# card with useful information. + +model_card = card.Card(model, metadata=card.metadata_from_config(Path(local_repo))) + +# %% +# Add more information +# ==================== +# So far, the model card does not tell viewers a lot about the model. Therefore, +# we add more information about the model, like a description and what its +# license is. + +model_card.metadata.license = "mit" +limitations = ( + "This model is made for educational purposes and is not ready to be used in" + " production." +) +model_description = ( + "This is a Linear Regression model trained on diabetes dataset. This model could be" + " used to predict the progression of diabetes. This model is pretty limited and" + " should just be used as an example of how to user `skops` and Hugging Face Hub." +) +model_card_authors = "skops_user, lazarust" +citation_bibtex = "bibtex\n@inproceedings{...,year={2022}}" +model_card.add( + **{ + "Model Card Authors": model_card_authors, + "Intended uses & limitations": limitations, + "Citation": citation_bibtex, + "Model description": model_description, + "Model description/Intended uses & limitations": limitations, + } +) + +# %% +# Add plots, metrics, and tables to our model card +# ================================================ +# We will now evaluate our model and add our findings to the model card. + +y_pred = model.predict(X_test) + +# plot the predicted values against the true values +plt.scatter(y_test, y_pred) +plt.xlabel("True values") +plt.ylabel("Predicted values") +plt.savefig(Path(local_repo) / "prediction_scatter.png") +model_card.add_plot(**{"Prediction Scatter": "prediction_scatter.png"}) + +mae = mean_absolute_error(y_test, y_pred) +mse = mean_squared_error(y_test, y_pred) +r2 = r2_score(y_test, y_pred) +model_card.add_metrics( + **{"Mean Absolute Error": mae, "Mean Squared Error": mse, "R-Squared Score": r2} +) + +# %% +# Save model card +# ================ +# We can simply save our model card by providing a path to :meth:`.Card.save`. +# The model hasn't been pushed to Hugging Face Hub yet, if you want to see how +# to push your models please refer to +# :ref:`this example `. + +model_card.save(Path(local_repo) / "README.md")