diff --git a/.github/workflows/deploy-model-card-creator.yml b/.github/workflows/deploy-model-card-creator.yml new file mode 100644 index 00000000..93cddc9f --- /dev/null +++ b/.github/workflows/deploy-model-card-creator.yml @@ -0,0 +1,46 @@ +name: Deploy-Space-Creator + +on: + - push + - pull_request + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + deploy-space-creator: + runs-on: "ubuntu-latest" + if: "github.repository == 'skops-dev/skops'" + # Timeout: https://stackoverflow.com/a/59076067/4521646 + timeout-minutes: 5 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install dependencies + run: | + pip install -e .[docs,tests] + python --version + pip --version + pip list + shell: bash + + - name: Create test skops space creator app + # by default, deploy to skops CI + if: github.ref != 'refs/heads/main' + run: | + python spaces/deploy-skops-model-card-creator.py + + - name: Create main skops space creator app + # if HF_HUB_TOKEN_SKLEARN, use that instead of skops CI orga + if: github.ref == 'refs/heads/main' + env: + HF_HUB_TOKEN_SKLEARN: ${{ secrets.HF_HUB_TOKEN_SKLEARN }} + run: | + python spaces/deploy-skops-model-card-creator.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d95e8edf..4026f7dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,4 +27,5 @@ repos: hooks: - id: mypy args: [--config-file=pyproject.toml] + exclude: "spaces/" additional_dependencies: [types-requests>=2.28.5] diff --git a/pyproject.toml b/pyproject.toml index 1ac0b580..f9abdbf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,6 @@ omit = [ ] [tool.mypy] -exclude = "(\\w+/)*test_\\w+\\.py$|old" +exclude = "(\\w+/)*test_\\w+\\.py$|spaces/skops_model_card_creator|old" ignore_missing_imports = true no_implicit_optional = true diff --git a/setup.py b/setup.py index 3cc6aa6a..fba0a56d 100644 --- a/setup.py +++ b/setup.py @@ -80,6 +80,7 @@ def setup_package(): "rich": min_deps.tag_to_packages["rich"], }, include_package_data=True, + packages=["skops"], ) setup(**package_data, **metadata) diff --git a/spaces/README.md b/spaces/README.md new file mode 100644 index 00000000..eff2d640 --- /dev/null +++ b/spaces/README.md @@ -0,0 +1,3 @@ +# Hugging Face Spaces + +Code and script for creating Hugging Face Spaces go here. diff --git a/spaces/deploy-skops-model-card-creator.py b/spaces/deploy-skops-model-card-creator.py new file mode 100644 index 00000000..507c790a --- /dev/null +++ b/spaces/deploy-skops-model-card-creator.py @@ -0,0 +1,49 @@ +# Deploying the app in skops_model_card_creator as a Hugging Face Space requires +# the HF_HUB_TOKEN to be set as environment variable + +import os +from pathlib import Path +from uuid import uuid4 + +from huggingface_hub import HfApi + +import skops +import skops.hub_utils +import skops.hub_utils.tests +from skops.hub_utils.tests.common import HF_HUB_TOKEN + +token = os.environ.get("HF_HUB_TOKEN_SKLEARN") +if token: + print("Deploying space to sklearn orga") +else: + print("Deploying space to skops CI") + token = HF_HUB_TOKEN + +client = HfApi(token=token) +user_name = client.whoami(token=token)["name"] +repo_name = f"skops-model-card-creator-{uuid4()}" +repo_id = f"{user_name}/{repo_name}" +print(f"Creating and pushing to repo: {repo_id}") + +space_repo = Path(skops.__path__[0]).parent / "spaces" / "skops_model_card_creator" + +client.create_repo( + repo_id=repo_id, + token=token, + repo_type="space", + exist_ok=True, + space_sdk="streamlit", +) +out = client.upload_folder( + repo_id=repo_id, + path_in_repo=".", + folder_path=space_repo, + commit_message="Create skops-model-card-creator space", + token=token, + repo_type="space", + create_pr=False, +) + +# link to main app, not to "/tree/main/" +url = out.rsplit("/", 3)[0] +print(f"visit the space at {url}") diff --git a/spaces/skops_model_card_creator/README.md b/spaces/skops_model_card_creator/README.md new file mode 100644 index 00000000..9df1fa0e --- /dev/null +++ b/spaces/skops_model_card_creator/README.md @@ -0,0 +1,323 @@ +--- +title: Skops Model Card Creator +emoji: 🐨 +colorFrom: indigo +colorTo: blue +sdk: streamlit +sdk_version: 1.17.0 +app_file: app.py +pinned: false +license: bsd-3-clause +tags: + - sklearn + - skops + - model card +--- + +# Create a Hugging Face model repository for scikit learn models + +This page aims to provide a simple interface to use the +[`skops`](https://skops.readthedocs.io/) model card and HF Hub creation +utilities. + +Below, we will explain the steps involved to create your own model repository to +host your scikit-learn model: + +1. Prepare the model repository +2. Edit the model card +3. Create the model repository on Hugging Face Hub + +## Step 1: Prepare the model repository + +In this step, you do the necessary preparation work to create a [model +repository on Hugging Face Hub](https://huggingface.co/docs/hub/models). + +### Upload a model + +Here you should upload the sklearn model we want to present in the model +repository. The model should be stored either as a ``pickle`` file or it should +use the [secure skops persistence +format](https://skops.readthedocs.io/en/stable/persistence.html). Later, this +model will be uploaded to the model repository so that you can share it with +others. + +The uploaded model should be a scikit-learn model or a model that is compatible +with the sklearn API, e.g. using [XGBoost sklearn +wrapper](https://xgboost.readthedocs.io/en/stable/python/python_api.html#module-xgboost.sklearn) +when it's an XGBoost model. + +If you just want to test out the application and don't want to upload a model, a +dummy model will be used instead. + +### Upload input data + +It's possible to upload input data as a csv file. If that is done, the first few +rows of the input data will be used as sample data for the model, e.g. when +trying out the [inference API](https://huggingface.co/inference-api). + + +### Choose the task type + +Choose the type of task that the model is intended to solve. It can be either +classification or regression, with input data being either tabular in nature or +text. + +### Requirements + +This is the list of Python requirements needed to run the model. + +### Choose the model card template + +This is the final step and choosing one of the options will bring you to the +editing step. + +#### Create a new skops model card + +This is the recommended way of getting started. The skops model card template +prefills the model card with some [useful +contents](https://skops.readthedocs.io/en/stable/model_card.html#model-card-content) +that you probably want to have in most model cards. Don't worry: If you don't +like part of the content, you can always edit or delete it later. + +#### Create a new empty model card + +If you want to start the model card completely from scratch, that's also +possible by choosing this option. It will generate a completely empty model card +for you that you can fashion to your liking. + +#### Load existing model card from HF Hub + +If you want to use an existing model card and edit it, that's also possible. +Please enter the Hugging Face Hub repository ID here and the corresponding model +card will be loaded. The repo ID is typically someting like `username/reponame`, +e.g. `openai/whisper-small`. Some models also omit the user name, e.g. `gpt2`. + +Note that when you choose an existing model card, a couple of files will be +downloaded, because they may be required to render the model card (e.g. images). +Therefore, depending on the repository, this step may take a bit. + +If you notice any problems when rendering the existing model card, please let us +know by [creating an issue](https://github.com/skops-dev/skops/issues). + +## Step 2: Edit the model card + +Before creating the model repository, it is crucial to ensure that the [model +card](https://huggingface.co/blog/model-cards) is edited to best represent the +model you're working on. This can be achieved in the editing step, which is +described in more detail below. + +### Editing sidbar + +In the left sidebar, you will be able to edit the model card, whereas the main +screen is reserved for rendering the model card so that you see what you will +get. We will start by describing the editing sidebar. + +Tip: You should increase the width of the side bar if it is too narrow for your +taste. + +#### Undo, redo & reset + +On top of the side bar, you have the option to undo, redo, and reset the last +operation you did. Say, you accidentally made a change, just press the `Undo` +button to undo this change. Similarly, if you want to undo your undo operation, +press the `Redo` button. Finally, if you press `Reset`, all your operations will +be undone (but don't worry if you click the button accidentally, you can redo +all of them if you want). + +#### Save, create repo & delete + +These buttons are intended for when you finished editing the model card. When +you click on `Save`, you will get the option to download the model card as a +markdown file. + +When clicking the `Create Repo` button, you will be taken to the next screen, +which offers you to create a model repository on Hugging Face Hub. This step +will be explained in more detail further below. + +Finally, you can click on `Delete` to completely discard all the changes you +made and be taken back to the start screen of the app. Be careful, any change +you made will be lost. It is thus advised to first save the model card before +pressing `Delete`. + +#### Edit a section + +Each section has its own form field, which allows you to make edits. Change the +name of the section or change the content (or both), then click `Update` to see +a preview of your change. As with all other operations, you can undo the change +by clicking on `Undo`. + +#### Delete a section + +Below the form field for editing the section, you will find a `Delete` button +(including the name of the section to make it clear which section it refers to). +If you click that button, the whole section, _including its subsections_, will +be deleted. Again, click on `Undo` if you accidentally deleted something that +you want to keep. + +#### Add section below + +If you click on this button, a new subsection wil be created under the current +section. This will create a section with a dummy title and dummy content, which +you can then edit. + +Note that this will create a new _subsection_. If there are already existing +subsections in the current section, the new subsection will be created _below_ +those existing subsections. So the new subsection you create might not appear +exactly where you expect it to appear. To illustrate this, assume that we have +the following sections and subsections: + +- Section A + - Subsection A.1 + - Subsection A.2 +- Section B + +If you create a new section below "Section A", it will be created on the same +level, and below of, "Subsection A.2", resulting in the following structure: + +- Section A + - Subsection A.1 + - Subsection A.2 + - NEW SUBSECTION +- Section B + +If you create a new section below the "Subsection A.1", you will actually create +a sub-subsection, resulting in the following structure instead: + +- Section A + - Subsection A.1 + - NEW SUB-SUBSECTION + - Subsection A.2 +- Section B + +Hopefully, this clarifies things. Unfortunately, there is no possibility (yet) +to re-order sections. + +#### Add figure below + +This button works quite similarly to adding a new section. The main difference +is that instead of having a text area to enter content, you will be asked to +upload an image file. By default, a dummy image will be shown in the preview. + +#### Add metrics (only skops template) + +If you have chosen the skops template, you will see an additional field called +`Add metrics` near the top of the side bar. Here you can choose metrics you want +to be shown in the model card, e.g. the accuracy the model achieved on a +specific dataset. Please enter one metric per line, with the metric name on the +left, then an `=` sign, and the value on the right, e.g. `accuracy on test set = +0.85`. + +After pressing `update`, the metrics will be shown in a table in the section +`Model description/Evaluation Results`. You can always add or remove metrics +from this field later. If you want to delete this section completely, look for +its edit form further below and press `Delete`. There, you can also edit the +table in a more fine grained way, e.g. by changing the alignment. + +If you don't use the skops template and still want to add a table, it is +possible to do that, but it's requires a bit more work. Add a new section as +described above, then, in the text area, create a table using the [markdown +table +syntax](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-tables#creating-a-table). + +### Model card visualization + +The main part of the page will show you what the final model card will look +like. + +#### Metadata + +On the very top, you can see the metadata of the model card (it is collapsed by +default). The metadata can be very useful for features on the HF Hub, e.g. +allowing other users to find your model by a given tag. + +Right now, it is not possible to edit the metadata directly from here. But don't +worry, once you have created the model card repository, you can easily edit the +metadata there. + +#### Table of Contents + +For your convenience, a table of contents is also shown at the top (collapsed by +default). This is useful if you have a bigger model card and want to see the +overview of all its contents. + +#### Markdown preview + +Finally, the model card itself is shown. This is how the model card will look +like once it is saved as markdown and then rendered. + +## Step 3: Creating a model repository + +After you have finished editing the model card, it is time to create a model +repository on Hugging Face Hub. Click on `Create Repo` and you will be taken to +the final step of the process. + +### Back & Delete + +If you find yourself wanting to make more edits to the model card, just click on +the `Back` button and you'll be brought back to the editing step. + +You can also click `Delete`, which will discard all your changes and bring you +back to the start page. Be careful: This step cannot be undone and all your +progress will be lost. + +### Files included in the repository + +For your convenience, this will show a preview of all the files included in the +repository, as well as their sizes. Don't create a repository if you see files +there that you don't want to be uploaded. + +### Privacy settings + +By default, a private repository will be created. If you untick this box, it +will be public instead. More information on what that implies can be found in +the [docs on repository +settings](https://huggingface.co/docs/hub/repositories-settings). + +### Name of the repository + +Here you have to enter the name of the repository. Typically, that's something +like `username/reponame` or `organame/reponame`. This field is mandatory and you +should ensure that the corresponding repository ID does not exist yet. + +### Enter your Hugging Face token + +Here you need to paste your Hugging Face token, which is used for +authentication. The token can be found [here](https://hf.co/settings/token) and +it always starts with "hf_". Entering a token is necessary to create a +repository. + +Note that if you don't already have an account on Hugging Face, you need to +create one to get a token. It's free. + +### Create a new repository + +Once all the required fields are filled, click on this button to create the +repository. Depending on the size, it may take a couple of seconds to finish. +Once it is created, you will see a success notification that includes the link +to the repository. Congratulations, you're done! + +## Troubleshooting + +### Not all skops features available + +This app is based on the [skops model card +feature](https://skops.readthedocs.io/en/stable/model_card.html#model-card-content). +However, it does not support all the options that are available there. If you +want to use all those options in a programmatic fashion, please follow the link +and read up on what it takes to create a model card with skops. The full power +of the `Card` class is documented +[here](https://skops.readthedocs.io/en/stable/modules/classes.html#skops.card.Card). + +### Strange behavior + +If the app behaves strangely, shows error messages, or renders incorrectly, it +may be necessary to refresh the browser tab. This will take you back to the +start page, with all progress being lost. If you can reproduce that behavior, +please [creating an issue](https://github.com/skops-dev/skops/issues) and let us +know. + +### Contact + +If you want to contact us, you can join our discord channel. To do that, follow +[these +instructions](https://skops.readthedocs.io/en/stable/community.html#discord). diff --git a/spaces/skops_model_card_creator/__init__.py b/spaces/skops_model_card_creator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/spaces/skops_model_card_creator/app.py b/spaces/skops_model_card_creator/app.py new file mode 100644 index 00000000..446ddd62 --- /dev/null +++ b/spaces/skops_model_card_creator/app.py @@ -0,0 +1,53 @@ +"""The app.py used with streamlit + +This ties together the different parts of the app. + +""" + +import os +import shutil +from pathlib import Path +from tempfile import mkdtemp +from typing import Literal + +import streamlit as st +from create import create_repo_input_form +from edit import edit_input_form +from gethelp import help_page +from start import start_input_form + +# Change cwd to a temporary path +if "work_dir" not in st.session_state: + work_dir = Path(mkdtemp(prefix="skops-")) + shutil.copy2("cat.png", work_dir / "cat.png") + os.chdir(work_dir) + st.session_state.work_dir = work_dir + +# Create a hf_path, which is where the repo will be created locally. When the +# session is created, copy the dummy cat.png file there and make it the cwd +if "hf_path" not in st.session_state: + hf_path = Path(mkdtemp(prefix="skops-")) + st.session_state.hf_path = hf_path + + +st.header("Skops model card creator") + + +class Screen: + state: Literal["start", "edit", "create_repo"] = "start" + + +if "screen" not in st.session_state: + st.session_state.screen = Screen() + + +if st.session_state.screen.state == "start": + start_input_form() +elif st.session_state.screen.state == "help": + help_page() +elif st.session_state.screen.state == "edit": + edit_input_form() +elif st.session_state.screen.state == "create_repo": + create_repo_input_form() +else: + st.write("Something went wrong, please open an issue") diff --git a/spaces/skops_model_card_creator/cat.png b/spaces/skops_model_card_creator/cat.png new file mode 100644 index 00000000..a538c784 Binary files /dev/null and b/spaces/skops_model_card_creator/cat.png differ diff --git a/spaces/skops_model_card_creator/create.py b/spaces/skops_model_card_creator/create.py new file mode 100644 index 00000000..7ac069c1 --- /dev/null +++ b/spaces/skops_model_card_creator/create.py @@ -0,0 +1,131 @@ +import os +from pathlib import Path + +import streamlit as st +from utils import get_rendered_model_card + +from skops import hub_utils + + +def _add_back_button(): + def fn(): + st.session_state.screen.state = "edit" + + st.button("Back", help="continue editing the model card", on_click=fn) + + +def _add_delete_button(): + def fn(): + if "hf_path" in st.session_state: + del st.session_state["hf_path"] + if "model_card" in st.session_state: + del st.session_state["model_card"] + if "task_state" in st.session_state: + st.session_state.task_state.reset() + if "create_repo_name" in st.session_state: + del st.session_state["create_repo_name"] + if "hf_token" in st.session_state: + del st.session_state["hf_token"] + st.session_state.screen.state = "start" + + st.button("Delete", on_click=fn, help="Start over from scratch (lose all progress)") + + +def _save_model_card(path: Path) -> None: + model_card = st.session_state.get("model_card") + if model_card: + # do not use model_card.save, see doc of get_rendered_model_card + rendered = get_rendered_model_card( + model_card, hf_path=str(st.session_state.hf_path) + ) + with open(path / "README.md", "w") as f: + f.write(rendered) + + +def _display_repo_overview(path: Path) -> None: + text = "Files included in the repository:\n" + for file in os.listdir(path): + size = os.path.getsize(path / file) + text += f"- `{file} ({size:,} bytes)`\n" + st.markdown(text) + + +def _display_private_box(): + tip = ( + "Private repositories can only seen by you or members of the same " + "organization, see https://huggingface.co/docs/hub/repositories-settings" + ) + st.checkbox( + "Make repository private", value=True, help=tip, key="create_repo_private" + ) + + +def _repo_id_field(): + st.text_input("Name of the repository (e.g. 'User/MyRepo')", key="create_repo_name") + + +def _hf_token_field(): + tip = "The Hugging Face token can be found at https://hf.co/settings/token" + st.text_input("Enter your Hugging Face token ('hf_***')", key="hf_token", help=tip) + + +def _create_hf_repo(path, repo_name, hf_token, private): + try: + hub_utils.push( + repo_id=repo_name, + source=path, + token=hf_token, + private=private, + create_remote=True, + ) + except Exception as exc: + st.error( + "Oops, something went wrong, please create an issue. " + f"The error message is:\n\n{exc}" + ) + return + + st.success(f"Successfully created the repo 'https://huggingface.co/{repo_name}'") + + +def _add_create_repo_button(): + private = bool(st.session_state.get("create_repo_private")) + repo_name = st.session_state.get("create_repo_name") + hf_token = st.session_state.get("hf_token") + disabled = (not repo_name) or (not hf_token) + + button_text = "Create a new repository" + tip = "Creating a repo requires a name and a token" + path = st.session_state.get("hf_path") + st.button( + button_text, + help=tip, + disabled=disabled, + on_click=_create_hf_repo, + args=(path, repo_name, hf_token, private), + ) + + if not repo_name: + st.info("Repository name is required") + if not hf_token: + st.info("Token is required") + + +def create_repo_input_form(): + if not st.session_state.screen.state == "create_repo": + return + + col_0, col_1, *_ = st.columns([2, 2, 2, 2]) + with col_0: + _add_back_button() + with col_1: + _add_delete_button() + + hf_path = st.session_state.hf_path + _save_model_card(hf_path) + _display_repo_overview(hf_path) + _display_private_box() + st.markdown("---") + _repo_id_field() + _hf_token_field() + _add_create_repo_button() diff --git a/spaces/skops_model_card_creator/edit.py b/spaces/skops_model_card_creator/edit.py new file mode 100644 index 00000000..6d0f5bb7 --- /dev/null +++ b/spaces/skops_model_card_creator/edit.py @@ -0,0 +1,421 @@ +"""The editing page of the app + +This is the meat of the application. On the sidebar, the content of the model +card is displayed in the form of editable fields. On the right side, the +rendered model card is shown. + +In the side bar, users can: + +- edit the title and content of existing sections +- delete sections +- add new sections below the current section +- add new figures below the current section + +Moreover, each action results in a "task" that is tracked in the task state. A +task has a "do" and an "undo" method. This allows us to provide "undo" and +"redo" features to the app, making it easier for users to experiment and deal +with errors. The "reset" button undoes all the tasks, leading back to the +initial model card. + +When the user is finished, there is a "save" button that downloads the model +card. They can also click "delete" to start over again, leading them to the +start page. + +""" + + +from __future__ import annotations + +import reprlib +from pathlib import Path +from tempfile import mkdtemp + +import streamlit as st +from huggingface_hub import hf_hub_download +from tasks import ( + AddFigureTask, + AddMetricsTask, + AddSectionTask, + DeleteSectionTask, + TaskState, + UpdateFigureTask, + UpdateFigureTitleTask, + UpdateSectionTask, +) +from utils import ( + get_rendered_model_card, + iterate_key_section_content, + process_card_for_rendering, +) + +from skops import card +from skops.card._model_card import PlotSection, split_subsection_names + +arepr = reprlib.Repr() +arepr.maxstring = 24 +tmp_path = Path(mkdtemp(prefix="skops-")) # temporary files + + +def load_model_card_from_repo(repo_id: str) -> card.Card: + print("downloading model card") + path = hf_hub_download(repo_id, "README.md") + model_card = card.parse_modelcard(path) + return model_card + + +def _update_model_card( + model_card: card.Card, + key: str, + section_name: str, + content: str, +) -> None: + # This is a very roundabout way to update the model card but it's necessary + # because of how streamlit handles session state. Basically, there have to + # be "key" arguments, which have to be retrieved from the session_state, as + # they are up-to-date. Just getting the Python variables is not enough, as + # they can be out of date. + + # key names must match with those used in form + new_title = st.session_state[f"{key}.title"] + new_content = st.session_state[f"{key}.content"] + + # determine if title is the same + old_title_split = split_subsection_names(section_name) + new_title_split = old_title_split[:-1] + [new_title] + is_title_same = old_title_split == new_title_split + + # determine if content is the same + is_content_same = (content == new_content) or (not content and not new_content) + if is_title_same and is_content_same: + return + + section = model_card.select(key) + if not isinstance(section, PlotSection): + # a normal section + task = UpdateSectionTask( + model_card, + key=key, + old_name=section_name, + new_name=new_title, + old_content=content, + new_content=new_content, + ) + else: + # a plot sectoin + if not new_content: # only title changed + task = UpdateFigureTitleTask( + model_card, key=key, old_name=section_name, new_name=new_title + ) + else: # new figure uploaded + fname = new_content.name.replace(" ", "_") + fpath = st.session_state.hf_path / fname + old_path = fpath.parent / Path(section.path).name + task = UpdateFigureTask( + model_card, + key=key, + old_name=section_name, + new_name=new_title, + data=new_content, + new_path=fpath, + old_path=old_path, + ) + st.session_state.task_state.add(task) + + +def _add_section(model_card: card.Card, key: str) -> None: + section_name = f"{key}/Untitled" + task = AddSectionTask( + model_card, title=section_name, content="[More Information Needed]" + ) + st.session_state.task_state.add(task) + + +def _add_figure(model_card: card.Card, key: str) -> None: + section_name = f"{key}/Untitled" + hf_path = st.session_state.hf_path + task = AddFigureTask( + model_card, path=hf_path, title=section_name, content="cat.png" + ) + st.session_state.task_state.add(task) + + +def _delete_section(model_card: card.Card, key: str, path: Path) -> None: + task = DeleteSectionTask(model_card, key=key, path=path) + st.session_state.task_state.add(task) + + +def _add_section_form( + model_card: card.Card, key: str, section_name: str, old_title: str, content: str +) -> None: + with st.form(key, clear_on_submit=False): + st.header(section_name) + # setting the 'key' argument below to update the session_state + st.text_input("Section name", value=old_title, key=f"{key}.title") + st.text_area("Content", value=content, key=f"{key}.content") + st.form_submit_button( + "Update", + on_click=_update_model_card, + args=(model_card, key, section_name, content), + ) + + +def _add_fig_form( + model_card: card.Card, key: str, section_name: str, old_title: str, content: str +) -> None: + with st.form(key, clear_on_submit=False): + st.header(section_name) + # setting the 'key' argument below to update the session_state + st.text_input("Section name", value=old_title, key=f"{key}.title") + st.file_uploader("Upload image", key=f"{key}.content") + st.form_submit_button( + "Update", + on_click=_update_model_card, + args=(model_card, key, section_name, content), + ) + + +def create_form_from_section( + model_card: card.Card, + key: str, + section_name: str, +) -> None: + # Code for creating a single section, plot or text + section = model_card.select(key) + content = section.content + split_sections = split_subsection_names(section_name) + old_title = split_sections[-1] + + if isinstance(section, PlotSection): + _add_fig_form( + model_card=model_card, + key=key, + section_name=section_name, + old_title=old_title, + content=content, + ) + path = st.session_state.hf_path / Path(section.path).name + else: + _add_section_form( + model_card=model_card, + key=key, + section_name=section_name, + old_title=old_title, + content=content, + ) + path = None + + col_0, col_1, col_2 = st.columns([4, 2, 2]) + with col_0: + st.button( + f"Delete '{arepr.repr(old_title)}'", + on_click=_delete_section, + args=(model_card, key, path), + key=f"{key}.delete", + help="Delete this section, including all its subsections", + ) + with col_1: + st.button( + "add section below", + on_click=_add_section, + args=(model_card, key), + key=f"{key}.add", + help="Add a new subsection below this section", + ) + with col_2: + st.button( + "add figure below", + on_click=_add_figure, + args=(model_card, key), + key=f"{key}.fig", + help="Add a new figure below this section", + ) + + +def display_sections(model_card: card.Card) -> None: + # display all sections, looping through them recursively + for key, title in iterate_key_section_content(model_card._data): + create_form_from_section(model_card, key=key, section_name=title) + + +def display_toc(model_card: card.Card) -> None: + toc = model_card.get_toc() + st.markdown(toc) + + +def display_model_card(model_card: card.Card) -> None: + rendered = model_card.render() + metadata, rendered = process_card_for_rendering(rendered) + + # strip metadata + with st.expander("show metadata"): + st.text(metadata) + + with st.expander("Table of Contents"): + display_toc(model_card) + + st.markdown(rendered, unsafe_allow_html=True) + + +def reset_model_card() -> None: + if "task_state" not in st.session_state: + return + if "model_card" not in st.session_state: + del st.session_state["model_card"] + + while st.session_state.task_state.done_list: + st.session_state.task_state.undo() + + +def delete_model_card() -> None: + if "hf_path" in st.session_state: + del st.session_state["hf_path"] + if "model_card" in st.session_state: + del st.session_state["model_card"] + if "task_state" in st.session_state: + st.session_state.task_state.reset() + st.session_state.screen.state = "start" + + +def undo_last(): + st.session_state.task_state.undo() + display_model_card(st.session_state.model_card) + + +def redo_last(): + st.session_state.task_state.redo() + display_model_card(st.session_state.model_card) + + +def add_download_model_card_button(): + model_card = st.session_state.model_card + data = get_rendered_model_card(model_card, hf_path=str(st.session_state.hf_path)) + tip = "Download the generated model card as markdown file" + st.download_button( + "Save (md)", + data=data, + help=tip, + file_name="README.md", + ) + + +def add_create_repo_button(): + def fn(): + st.session_state.screen.state = "create_repo" + + button_disabled = not bool(st.session_state.get("model_card")) + st.button( + "Create Repo", + help="Create a model repository on Hugging Face Hub", + on_click=fn, + disabled=button_disabled, + ) + + +def display_edit_buttons(): + # first row: undo + redo + reset + col_0, col_1, col_2, *_ = st.columns([2, 2, 2, 2]) + undo_disabled = not bool(st.session_state.task_state.done_list) + redo_disabled = not bool(st.session_state.task_state.undone_list) + with col_0: + name = f"UNDO ({len(st.session_state.task_state.done_list)})" + tip = "Undo the last edit" + st.button(name, on_click=undo_last, disabled=undo_disabled, help=tip) + with col_1: + name = f"REDO ({len(st.session_state.task_state.undone_list)})" + tip = "Redo the last undone edit" + st.button(name, on_click=redo_last, disabled=redo_disabled, help=tip) + with col_2: + tip = "Undo all edits" + st.button("Reset", on_click=reset_model_card, help=tip) + + # second row: download + create repo + delete + col_0, col_1, col_2, *_ = st.columns([2, 2, 2, 2]) + with col_0: + add_download_model_card_button() + with col_1: + add_create_repo_button() + with col_2: + tip = "Start over from scratch (lose all progress)" + st.button("Delete", on_click=delete_model_card, help=tip) + + +def _update_model_diagram(): + val = st.session_state.get("special_model_diagram", True) + model_card = st.session_state.model_card + model_card.model_diagram = val + + # TODO: this may no longer be necesssary once this issue is solved: + # https://github.com/skops-dev/skops/issues/292 + if val: + model_card.add_model_plot() + else: + model_card.delete("Model description/Training Procedure/Model Plot") + + +def _parse_metrics(metrics: str) -> dict[str, str | float]: + # parse metrics from text area, one per line, into a dict + metrics_table = {} + for line in metrics.splitlines(): + line = line.strip() + val: str | float + name, _, val = line.partition("=") + try: + # try to coerce to float but don't error if it fails + val = float(val.strip()) + except ValueError: + pass + metrics_table[name.strip()] = val + return metrics_table + + +def _update_metrics(): + metrics = st.session_state.get("special_metrics_text", {}) + model_card = st.session_state.model_card + metrics_table = _parse_metrics(metrics) + + # check if any change + if metrics_table == model_card._metrics: + return + + task = AddMetricsTask(model_card, metrics_table) + st.session_state.task_state.add(task) + + +def display_skops_special_fields(): + st.checkbox( + "Show model diagram", + value=True, + on_change=_update_model_diagram, + key="special_model_diagram", + ) + + with st.expander("Add metrics"): + with st.form("special_metrics", clear_on_submit=False): + st.text_area( + "Add one metric per line, e.g. 'accuracy = 0.9'", + key="special_metrics_text", + ) + st.form_submit_button( + "Update", + on_click=_update_metrics, + ) + + +def edit_input_form(): + if "task_state" not in st.session_state: + st.session_state.task_state = TaskState() + + with st.sidebar: + # TOP ROW BUTTONS + display_edit_buttons() + + # SHOW SPECIAL FIELDS IF SKOPS TEMPLATE WAS USED + if st.session_state.get("model_card_type", "") == "skops": + display_skops_special_fields() + + # SHOW EDITABLE SECTIONS + if "model_card" in st.session_state: + display_sections(st.session_state.model_card) + + if "model_card" in st.session_state: + display_model_card(st.session_state.model_card) diff --git a/spaces/skops_model_card_creator/gethelp.py b/spaces/skops_model_card_creator/gethelp.py new file mode 100644 index 00000000..1f793f42 --- /dev/null +++ b/spaces/skops_model_card_creator/gethelp.py @@ -0,0 +1,329 @@ +import streamlit as st + + +def add_back_button(key): + def fn(): + st.session_state.screen.state = "start" + + st.button("Back", help="Get back to the start screen", on_click=fn, key=key) + + +help_md = """# Create a Hugging Face model repository for scikit learn models + +This page aims to provide a simple interface to use the +[`skops`](https://skops.readthedocs.io/) model card and HF Hub creation +utilities. + +Below, we will explain the steps involved to create your own model repository to +host your scikit-learn model: + +1. Prepare the model repository +2. Edit the model card +3. Create the model repository on Hugging Face Hub + +## Step 1: Prepare the model repository + +In this step, you do the necessary preparation work to create a [model +repository on Hugging Face Hub](https://huggingface.co/docs/hub/models). + +### Upload a model + +Here you should upload the sklearn model we want to present in the model +repository. The model should be stored either as a ``pickle`` file or it should +use the [secure skops persistence +format](https://skops.readthedocs.io/en/stable/persistence.html). Later, this +model will be uploaded to the model repository so that you can share it with +others. + +The uploaded model should be a scikit-learn model or a model that is compatible +with the sklearn API, e.g. using [XGBoost sklearn +wrapper](https://xgboost.readthedocs.io/en/stable/python/python_api.html#module-xgboost.sklearn) +when it's an XGBoost model. + +If you just want to test out the application and don't want to upload a model, a +dummy model will be used instead. + +### Upload input data + +It's possible to upload input data as a csv file. If that is done, the first few +rows of the input data will be used as sample data for the model, e.g. when +trying out the [inference API](https://huggingface.co/inference-api). + + +### Choose the task type + +Choose the type of task that the model is intended to solve. It can be either +classification or regression, with input data being either tabular in nature or +text. + +### Requirements + +This is the list of Python requirements needed to run the model. + +### Choose the model card template + +This is the final step and choosing one of the options will bring you to the +editing step. + +#### Create a new skops model card + +This is the recommended way of getting started. The skops model card template +prefills the model card with some [useful +contents](https://skops.readthedocs.io/en/stable/model_card.html#model-card-content) +that you probably want to have in most model cards. Don't worry: If you don't +like part of the content, you can always edit or delete it later. + +#### Create a new empty model card + +If you want to start the model card completely from scratch, that's also +possible by choosing this option. It will generate a completely empty model card +for you that you can fashion to your liking. + +#### Load existing model card from HF Hub + +If you want to use an existing model card and edit it, that's also possible. +Please enter the Hugging Face Hub repository ID here and the corresponding model +card will be loaded. The repo ID is typically someting like `username/reponame`, +e.g. `openai/whisper-small`. Some models also omit the user name, e.g. `gpt2`. + +Note that when you choose an existing model card, a couple of files will be +downloaded, because they may be required to render the model card (e.g. images). +Therefore, depending on the repository, this step may take a bit. + +If you notice any problems when rendering the existing model card, please let us +know by [creating an issue](https://github.com/skops-dev/skops/issues). + +## Step 2: Edit the model card + +Before creating the model repository, it is crucial to ensure that the [model +card](https://huggingface.co/blog/model-cards) is edited to best represent the +model you're working on. This can be achieved in the editing step, which is +described in more detail below. + +### Editing sidbar + +In the left sidebar, you will be able to edit the model card, whereas the main +screen is reserved for rendering the model card so that you see what you will +get. We will start by describing the editing sidebar. + +Tip: You should increase the width of the side bar if it is too narrow for your +taste. + +#### Undo, redo & reset + +On top of the side bar, you have the option to undo, redo, and reset the last +operation you did. Say, you accidentally made a change, just press the `Undo` +button to undo this change. Similarly, if you want to undo your undo operation, +press the `Redo` button. Finally, if you press `Reset`, all your operations will +be undone (but don't worry if you click the button accidentally, you can redo +all of them if you want). + +#### Save, create repo & delete + +These buttons are intended for when you finished editing the model card. When +you click on `Save`, you will get the option to download the model card as a +markdown file. + +When clicking the `Create Repo` button, you will be taken to the next screen, +which offers you to create a model repository on Hugging Face Hub. This step +will be explained in more detail further below. + +Finally, you can click on `Delete` to completely discard all the changes you +made and be taken back to the start screen of the app. Be careful, any change +you made will be lost. It is thus advised to first save the model card before +pressing `Delete`. + +#### Edit a section + +Each section has its own form field, which allows you to make edits. Change the +name of the section or change the content (or both), then click `Update` to see +a preview of your change. As with all other operations, you can undo the change +by clicking on `Undo`. + +#### Delete a section + +Below the form field for editing the section, you will find a `Delete` button +(including the name of the section to make it clear which section it refers to). +If you click that button, the whole section, _including its subsections_, will +be deleted. Again, click on `Undo` if you accidentally deleted something that +you want to keep. + +#### Add section below + +If you click on this button, a new subsection wil be created under the current +section. This will create a section with a dummy title and dummy content, which +you can then edit. + +Note that this will create a new _subsection_. If there are already existing +subsections in the current section, the new subsection will be created _below_ +those existing subsections. So the new subsection you create might not appear +exactly where you expect it to appear. To illustrate this, assume that we have +the following sections and subsections: + +- Section A + - Subsection A.1 + - Subsection A.2 +- Section B + +If you create a new section below "Section A", it will be created on the same +level, and below of, "Subsection A.2", resulting in the following structure: + +- Section A + - Subsection A.1 + - Subsection A.2 + - NEW SUBSECTION +- Section B + +If you create a new section below the "Subsection A.1", you will actually create +a sub-subsection, resulting in the following structure instead: + +- Section A + - Subsection A.1 + - NEW SUB-SUBSECTION + - Subsection A.2 +- Section B + +Hopefully, this clarifies things. Unfortunately, there is no possibility (yet) +to re-order sections. + +#### Add figure below + +This button works quite similarly to adding a new section. The main difference +is that instead of having a text area to enter content, you will be asked to +upload an image file. By default, a dummy image will be shown in the preview. + +#### Add metrics (only skops template) + +If you have chosen the skops template, you will see an additional field called +`Add metrics` near the top of the side bar. Here you can choose metrics you want +to be shown in the model card, e.g. the accuracy the model achieved on a +specific dataset. Please enter one metric per line, with the metric name on the +left, then an `=` sign, and the value on the right, e.g. `accuracy on test set = +0.85`. + +After pressing `update`, the metrics will be shown in a table in the section +`Model description/Evaluation Results`. You can always add or remove metrics +from this field later. If you want to delete this section completely, look for +its edit form further below and press `Delete`. There, you can also edit the +table in a more fine grained way, e.g. by changing the alignment. + +If you don't use the skops template and still want to add a table, it is +possible to do that, but it's requires a bit more work. Add a new section as +described above, then, in the text area, create a table using the [markdown +table +syntax](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-tables#creating-a-table). + +### Model card visualization + +The main part of the page will show you what the final model card will look +like. + +#### Metadata + +On the very top, you can see the metadata of the model card (it is collapsed by +default). The metadata can be very useful for features on the HF Hub, e.g. +allowing other users to find your model by a given tag. + +Right now, it is not possible to edit the metadata directly from here. But don't +worry, once you have created the model card repository, you can easily edit the +metadata there. + +#### Table of Contents + +For your convenience, a table of contents is also shown at the top (collapsed by +default). This is useful if you have a bigger model card and want to see the +overview of all its contents. + +#### Markdown preview + +Finally, the model card itself is shown. This is how the model card will look +like once it is saved as markdown and then rendered. + +## Step 3: Creating a model repository + +After you have finished editing the model card, it is time to create a model +repository on Hugging Face Hub. Click on `Create Repo` and you will be taken to +the final step of the process. + +### Back & Delete + +If you find yourself wanting to make more edits to the model card, just click on +the `Back` button and you'll be brought back to the editing step. + +You can also click `Delete`, which will discard all your changes and bring you +back to the start page. Be careful: This step cannot be undone and all your +progress will be lost. + +### Files included in the repository + +For your convenience, this will show a preview of all the files included in the +repository, as well as their sizes. Don't create a repository if you see files +there that you don't want to be uploaded. + +### Privacy settings + +By default, a private repository will be created. If you untick this box, it +will be public instead. More information on what that implies can be found in +the [docs on repository +settings](https://huggingface.co/docs/hub/repositories-settings). + +### Name of the repository + +Here you have to enter the name of the repository. Typically, that's something +like `username/reponame` or `organame/reponame`. This field is mandatory and you +should ensure that the corresponding repository ID does not exist yet. + +### Enter your Hugging Face token + +Here you need to paste your Hugging Face token, which is used for +authentication. The token can be found [here](https://hf.co/settings/token) and +it always starts with "hf_". Entering a token is necessary to create a +repository. + +Note that if you don't already have an account on Hugging Face, you need to +create one to get a token. It's free. + +### Create a new repository + +Once all the required fields are filled, click on this button to create the +repository. Depending on the size, it may take a couple of seconds to finish. +Once it is created, you will see a success notification that includes the link +to the repository. Congratulations, you're done! + +## Troubleshooting + +### Not all skops features available + +This app is based on the [skops model card +feature](https://skops.readthedocs.io/en/stable/model_card.html#model-card-content). +However, it does not support all the options that are available there. If you +want to use all those options in a programmatic fashion, please follow the link +and read up on what it takes to create a model card with skops. The full power +of the `Card` class is documented +[here](https://skops.readthedocs.io/en/stable/modules/classes.html#skops.card.Card). + +### Strange behavior + +If the app behaves strangely, shows error messages, or renders incorrectly, it +may be necessary to refresh the browser tab. This will take you back to the +start page, with all progress being lost. If you can reproduce that behavior, +please [creating an issue](https://github.com/skops-dev/skops/issues) and let us +know. + +### Contact + +If you want to contact us, you can join our discord channel. To do that, follow +[these +instructions](https://skops.readthedocs.io/en/stable/community.html#discord). +""" + + +def add_help_content(): + # This is the exact same text as in the README.md of this space + st.markdown(help_md) + + +def help_page(): + add_back_button(key="help_get_back") + add_help_content() + add_back_button(key="help_get_back2") # names must be unique diff --git a/spaces/skops_model_card_creator/make-data.py b/spaces/skops_model_card_creator/make-data.py new file mode 100644 index 00000000..1db69efb --- /dev/null +++ b/spaces/skops_model_card_creator/make-data.py @@ -0,0 +1,30 @@ +# companion script to the space creator +# generates the logreg.pkl and logreg.skops file, as well as data.csv + +import pickle + +import pandas as pd +from sklearn.datasets import make_classification +from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler + +import skops.io as sio + +X, y = make_classification() +df = pd.DataFrame(X) + +clf = Pipeline( + [ + ("scale", StandardScaler()), + ("clf", LogisticRegression(random_state=0)), + ] +) +clf.fit(X, y) + +with open("logreg.pkl", "wb") as f: + pickle.dump(clf, f) +sio.dump(clf, "logreg.skops") + + +df.to_csv("data.csv", index=False) diff --git a/spaces/skops_model_card_creator/packages.txt b/spaces/skops_model_card_creator/packages.txt new file mode 100644 index 00000000..4a59b54c --- /dev/null +++ b/spaces/skops_model_card_creator/packages.txt @@ -0,0 +1 @@ +pandoc diff --git a/spaces/skops_model_card_creator/requirements.txt b/spaces/skops_model_card_creator/requirements.txt new file mode 100644 index 00000000..e1747269 --- /dev/null +++ b/spaces/skops_model_card_creator/requirements.txt @@ -0,0 +1,7 @@ +catboost +huggingface_hub +lightgbm +pandas +scikit-learn +xgboost +git+https://github.com/skops-dev/skops.git diff --git a/spaces/skops_model_card_creator/start.py b/spaces/skops_model_card_creator/start.py new file mode 100644 index 00000000..523591d9 --- /dev/null +++ b/spaces/skops_model_card_creator/start.py @@ -0,0 +1,251 @@ +"""Start page of the app + +This page is used to initialize a model card that is either: + +1. based on the skops template +2. empty +3. loads an existing model card + +Optionally, users can add a model file, data, requirements, and choose a task. + +""" + +import glob +import io +import os +import pickle +import shutil +from pathlib import Path +from tempfile import mkdtemp + +import pandas as pd +import sklearn +import streamlit as st +from huggingface_hub import snapshot_download +from huggingface_hub.utils import HFValidationError, RepositoryNotFoundError +from sklearn.base import BaseEstimator +from sklearn.dummy import DummyClassifier + +import skops.io as sio +from skops import card, hub_utils + +tmp_path = Path(mkdtemp(prefix="skops-")) # temporary files +description = """Create a Hugging Face model repository for scikit learn models + +This page aims to provide a simple interface to use the +[`skops`](https://skops.readthedocs.io/) model card and HF Hub creation +utilities. + +""" + + +def load_model() -> None: + if st.session_state.get("model_file") is None: + st.session_state.model = DummyClassifier() + return + + bytes_data = st.session_state.model_file.getvalue() + if st.session_state.model_file.name.endswith("skops"): + model = sio.loads(bytes_data, trusted=True) + else: + model = pickle.loads(bytes_data) + assert isinstance(model, BaseEstimator), "model must be an sklearn model" + + st.session_state.model = model + + +def load_data() -> None: + if st.session_state.get("data_file"): + bytes_data = io.BytesIO(st.session_state.data_file.getvalue()) + df = pd.read_csv(bytes_data) + else: + df = pd.DataFrame([]) + + st.session_state.data = df + + +def _clear_repo(path: str) -> None: + for file_path in glob.glob(str(Path(path) / "*")): + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + + +def init_repo() -> None: + path = st.session_state.hf_path + _clear_repo(path) + requirements = [] + task = "tabular-classification" + data = pd.DataFrame([]) + + if "requirements" in st.session_state: + requirements = st.session_state.requirements.splitlines() + if "task" in st.session_state: + task = st.session_state.task + if "data_file" in st.session_state: + load_data() + data = st.session_state.data + + if task.startswith("text") and isinstance(data, pd.DataFrame): + data = data.values.tolist() + + try: + file_name = tmp_path / "model.skops" + sio.dump(st.session_state.model, file_name) + + hub_utils.init( + model=file_name, + dst=path, + task=task, + data=data, + requirements=requirements, + ) + except Exception as exc: + print("Uh oh, something went wrong when initializing the repo:", exc) + + +def create_skops_model_card() -> None: + init_repo() + metadata = card.metadata_from_config(st.session_state.hf_path) + model_card = card.Card(model=st.session_state.model, metadata=metadata) + st.session_state.model_card = model_card + st.session_state.model_card_type = "skops" + st.session_state.screen.state = "edit" + + +def create_empty_model_card() -> None: + init_repo() + metadata = card.metadata_from_config(st.session_state.hf_path) + model_card = card.Card( + model=st.session_state.model, metadata=metadata, template=None + ) + model_card.add(**{"Untitled": "[More Information Needed]"}) + st.session_state.model_card = model_card + st.session_state.model_card_type = "empty" + st.session_state.screen.state = "edit" + + +def create_hf_model_card() -> None: + repo_id = st.session_state.get("hf_repo_id", "").strip().strip("'").strip('"') + if not repo_id: + return + + try: + allow_patterns = [ + "*.md", + ".txt", + "*.png", + "*.gif", + "*.jpg", + "*.jpeg", + "*.bmp", + "*.webp", + ] + path = snapshot_download(repo_id, allow_patterns=allow_patterns) + except (HFValidationError, RepositoryNotFoundError): + st.error( + f"Repository '{repo_id}' could not be found on HF Hub, " + "please check that the repo ID is correct." + ) + return + + # move everything to the hf_path and working dir + hf_path = st.session_state.hf_path + shutil.copytree(path, hf_path, dirs_exist_ok=True) + shutil.copytree(path, ".", dirs_exist_ok=True) + + model_card = card.parse_modelcard(hf_path / "README.md") + st.session_state.model_card = model_card + st.session_state.model_card_type = "loaded" + st.session_state.screen.state = "edit" + + +def add_help_button(): + def fn(): + st.session_state.screen.state = "help" + + st.button( + "Instructions", + on_click=fn, + help="Detailed explanation of this space", + key="get_help", + ) + + +def start_input_form(): + if "model" not in st.session_state: + st.session_state.model = DummyClassifier() + + if "data" not in st.session_state: + st.session_state.data = pd.DataFrame([]) + + if "model_card" not in st.session_state: + st.session_state.model_card = None + + st.markdown(description) + + add_help_button() + + st.markdown("---") + + st.text( + "Upload an sklearn model (strongly recommended)\n" + "The model can be used to automatically populate fields in the model card." + ) + + if not st.session_state.get("model_file"): + st.file_uploader( + "Upload an sklearn model (pickle or skops format)", + on_change=load_model, + key="model_file", + ) + + st.markdown("---") + + st.text( + "Upload samples from your data (in csv format)\n" + "This sample data can be attached to the metadata of the model card" + ) + st.file_uploader( + "Upload input data (csv)", type=["csv"], on_change=load_data, key="data_file" + ) + st.markdown("---") + + st.selectbox( + label="Choose the task type", + options=[ + "tabular-classification", + "tabular-regression", + "text-classification", + "text-regression", + ], + key="task", + on_change=init_repo, + ) + st.markdown("---") + + st.text_area( + label="Requirements", + value=f"scikit-learn=={sklearn.__version__}\n", + key="requirements", + on_change=init_repo, + ) + st.markdown("---") + + st.markdown("Choose one of the options below to get started:") + col_0, col_1, col_2 = st.columns([2, 2, 2]) + with col_0: + st.button("Create a new skops model card", on_click=create_skops_model_card) + + with col_1: + st.button("Create a new empty model card", on_click=create_empty_model_card) + + with col_2: + with st.form("Load existing model card from HF Hub", clear_on_submit=False): + st.markdown("Load existing model card from HF Hub") + st.text_input("Repo name (e.g. 'gpt2')", key="hf_repo_id") + st.form_submit_button("Load", on_click=create_hf_model_card) + + +start_input_form() diff --git a/spaces/skops_model_card_creator/tasks.py b/spaces/skops_model_card_creator/tasks.py new file mode 100644 index 00000000..b6238d0c --- /dev/null +++ b/spaces/skops_model_card_creator/tasks.py @@ -0,0 +1,298 @@ +"""Functionality around tasks + +Tasks are used to implement "undo" and "redo" functionality. + +""" +from __future__ import annotations + +import shutil +from pathlib import Path +from tempfile import mkdtemp +from uuid import uuid4 + +from streamlit.runtime.uploaded_file_manager import UploadedFile + +from skops import card +from skops.card._model_card import PlotSection, split_subsection_names + + +class Task: + """(Abstract) base class for tasks""" + + def do(self) -> None: + raise NotImplementedError + + def undo(self) -> None: + raise NotImplementedError + + +class TaskState: + """Tracking the state of tasks""" + + def __init__(self) -> None: + self.done_list: list[Task] = [] + self.undone_list: list[Task] = [] + + def undo(self) -> None: + if not self.done_list: + return + + task = self.done_list.pop(-1) + task.undo() + self.undone_list.append(task) + + def redo(self) -> None: + if not self.undone_list: + return + + task = self.undone_list.pop(-1) + task.do() + self.done_list.append(task) + + def add(self, task: Task) -> None: + task.do() + self.done_list.append(task) + self.undone_list.clear() + + def reset(self) -> None: + self.done_list.clear() + self.undone_list.clear() + + +class AddSectionTask(Task): + """Add a new text section""" + + def __init__( + self, + model_card: card.Card, + title: str, + content: str, + ) -> None: + self.model_card = model_card + self.title = title + self.key = title + " " + str(uuid4())[:6] + self.content = content + + def do(self) -> None: + self.model_card.add(**{self.key: self.content}) + section = self.model_card.select(self.key) + section.title = split_subsection_names(self.title)[-1] + + def undo(self) -> None: + self.model_card.delete(self.key) + + +class AddFigureTask(Task): + """Add a new figure section + + Figure always starts out with dummy image cat.png. + + """ + + def __init__( + self, + model_card: card.Card, + path: Path, + title: str, + content: str, + ) -> None: + self.model_card = model_card + self.title = title + + # Create a unique file name, since the same image can exist more than + # once per model card. + fname = Path(content) + stem = fname.stem + suffix = fname.suffix + uniq = str(uuid4())[:6] + new_fname = str(path / stem) + "_" + uniq + suffix + + self.key = title + " " + uniq + self.content = Path(new_fname) + + def do(self) -> None: + shutil.copy("cat.png", self.content) + self.model_card.add_plot(**{self.key: self.content}) + section = self.model_card.select(self.key) + section.title = split_subsection_names(self.title)[-1] + + def undo(self) -> None: + self.content.unlink(missing_ok=True) + self.model_card.delete(self.key) + + +class DeleteSectionTask(Task): + """Delete a section + + The section is not completely removed from the underlying data structure, + but only turned invisible. + + """ + + def __init__( + self, + model_card: card.Card, + key: str, + path: Path | None, + ) -> None: + self.model_card = model_card + self.key = key + # when 'deleting' a file, move it to a temp file + self.path = path + self.tmp_path = Path(mkdtemp(prefix="skops-")) / str(uuid4()) + + def do(self) -> None: + self.model_card.select(self.key).visible = False + if self.path: + shutil.move(self.path, self.tmp_path) + + def undo(self) -> None: + self.model_card.select(self.key).visible = True + if self.path: + shutil.move(self.tmp_path, self.path) + + +class UpdateSectionTask(Task): + """Change the title or content of a text section""" + + def __init__( + self, + model_card: card.Card, + key: str, + old_name: str, + new_name: str, + old_content: str, + new_content: str, + ) -> None: + self.model_card = model_card + self.key = key + self.old_name = old_name + self.new_name = new_name + self.old_content = old_content + self.new_content = new_content + + def do(self) -> None: + section = self.model_card.select(self.key) + new_title = split_subsection_names(self.new_name)[-1] + section.title = new_title + section.content = self.new_content + + def undo(self) -> None: + section = self.model_card.select(self.key) + old_title = split_subsection_names(self.old_name)[-1] + section.title = old_title + section.content = self.old_content + + +class UpdateFigureTitleTask(Task): + """Change the title a plot section + + Changing the title is easy, just replace it and be done. + + """ + + def __init__( + self, + model_card: card.Card, + key: str, + old_name: str, + new_name: str, + ) -> None: + self.model_card = model_card + self.key = key + self.old_name = old_name + self.new_name = new_name + + def do(self) -> None: + section = self.model_card.select(self.key) + new_title = split_subsection_names(self.new_name)[-1] + section.title = self.title = new_title + + def undo(self) -> None: + section = self.model_card.select(self.key) + old_title = split_subsection_names(self.old_name)[-1] + section.title = old_title + + +class UpdateFigureTask(Task): + """Change the title or image of a figure section + + Changing the title is easy, just replace it and be done. + + Changing the figure is a bit more tricky. The old figure is in the hf_path + under its old name. The new figure is an UploadFile object. For the DO + operation, move the old figure to a temporary file and store the UploadFile + content to a new file (which may have a different name). + + For the UNDO operation, delete the new figure (its content is still stored + in the UploadFile) and move back the old figure from its temporary file to + the original location (with its original name). + + """ + + def __init__( + self, + model_card: card.Card, + key: str, + old_name: str, + new_name: str, + data: UploadedFile, + new_path: Path, + old_path: Path, + ) -> None: + self.model_card = model_card + self.key = key + self.old_name = old_name + self.new_name = new_name + self.new_path = new_path + self.old_path = old_path + self.new_data = data + # when 'deleting' the old image, move to temp path + self.tmp_path = Path(mkdtemp(prefix="skops-")) / str(uuid4()) + + def do(self) -> None: + section = self.model_card.select(self.key) + assert isinstance(section, PlotSection), "has to be a PlotSection" + new_title = split_subsection_names(self.new_name)[-1] + section.title = self.title = new_title + + # write figure + # note: this can still be the same image if the image is a file, there + # is no test to check, e.g., the hash of the image + shutil.move(self.old_path, self.tmp_path) + + with open(self.new_path, "wb") as f: + f.write(self.new_data.getvalue()) + + section.path = self.new_path + + def undo(self) -> None: + section = self.model_card.select(self.key) + assert isinstance(section, PlotSection), "has to be a PlotSection" + old_title = split_subsection_names(self.old_name)[-1] + section.title = old_title + + self.new_path.unlink(missing_ok=True) + shutil.move(self.tmp_path, self.old_path) + section.path = self.old_path + + +class AddMetricsTask(Task): + """Add new metrics""" + + def __init__( + self, + model_card: card.Card, + metrics: dict[str, str | int | float], + ) -> None: + self.model_card = model_card + self.old_metrics = model_card._metrics.copy() + self.new_metrics = metrics + + def do(self) -> None: + self.model_card._metrics.clear() + self.model_card.add_metrics(**self.new_metrics) + + def undo(self) -> None: + self.model_card._metrics.clear() + self.model_card.add_metrics(**self.old_metrics) diff --git a/spaces/skops_model_card_creator/utils.py b/spaces/skops_model_card_creator/utils.py new file mode 100644 index 00000000..1b4c1d3e --- /dev/null +++ b/spaces/skops_model_card_creator/utils.py @@ -0,0 +1,107 @@ +"""Utility functions for the app""" + +from __future__ import annotations + +import base64 +import os +import re +from pathlib import Path +from typing import Iterator + +from skops import card +from skops.card._model_card import Section + +PAT_MD_IMG = re.compile( + r'(!\[(?P[^\]]+)\]\((?P[^\)"\s]+)\s*([^\)]*)\))' +) + + +def get_rendered_model_card(model_card: card.Card, hf_path: str) -> str: + # This is a bit hacky: + # As a space, the model card is created in a temporary hf_path directory, + # which is where all the files are put. So e.g. if a figure is added, it is + # found at /tmp/skops-jtyqdgk3/fig.png. However, when the model card is is + # actually used, we don't want that, since there, the files will be in the + # cwd. Therefore, we remove the tmp directory everywhere we find it in the + # file. + if not hf_path.endswith(os.path.sep): + hf_path += os.path.sep + + rendered = model_card.render() + rendered = rendered.replace(hf_path, "") + return rendered + + +def process_card_for_rendering(rendered: str) -> tuple[str, str]: + idx = rendered[1:].index("\n---") + 1 + metadata = rendered[3:idx] + rendered = rendered[idx + 4 :] # noqa: E203 + + # below is a hack to display the images in streamlit + # https://discuss.streamlit.io/t/image-in-markdown/13274/10 The problem is + + # that streamlit does not display images in markdown, so we need to replace + # them with html. However, we only want that in the rendered markdown, not + # in the card that is produced for the hub + def markdown_images(markdown): + # example image markdown: + # ![Test image](images/test.png "Alternate text") + images = PAT_MD_IMG.findall(markdown) + return images + + def img_to_bytes(img_path): + img_bytes = Path(img_path).read_bytes() + encoded = base64.b64encode(img_bytes).decode() + return encoded + + def img_to_html(img_path, img_alt): + img_format = img_path.split(".")[-1] + img_html = ( + f'' + ) + return img_html + + def markdown_insert_images(markdown): + images = markdown_images(markdown) + + for image in images: + image_markdown = image[0] + image_alt = image[1] + image_path = image[2] + markdown = markdown.replace( + image_markdown, img_to_html(image_path, image_alt) + ) + return markdown + + rendered_with_img = markdown_insert_images(rendered) + return metadata, rendered_with_img + + +def iterate_key_section_content( + data: dict[str, Section], + parent_section: str = "", + parent_keys: list[str] | None = None, +) -> Iterator[tuple[str, str]]: + parent_keys = parent_keys or [] + + for key, val in data.items(): + if not val.visible: + continue + + if parent_section: + title = "/".join((parent_section, val.title)) + else: + title = val.title + + return_key = key if not parent_keys else "/".join(parent_keys + [key]) + yield return_key, title + + if val.subsections: + yield from iterate_key_section_content( + val.subsections, + parent_section=title, + parent_keys=parent_keys + [key], + )