From 70993e0482ec23c6c33ee2cea68ad0dcf0ddaa85 Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Mon, 5 Dec 2022 16:38:42 +0300 Subject: [PATCH 01/17] added format in config --- skops/hub_utils/_hf_hub.py | 27 ++++++++++++++++++++++++++- skops/hub_utils/tests/test_hf_hub.py | 8 ++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 2c2fd323..e07ae375 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -151,6 +151,11 @@ def _create_config( "text-regression", ], data, + save_format: Literal[ # type: ignore + "skops", + "pickle", + "auto", + ] = "auto", ) -> None: """Write the configuration into a ``config.json`` file. @@ -184,7 +189,9 @@ def _create_config( model. The first 3 input values are used as example inputs. - + save_format: str + The format used to persist the model. Can be ``"auto"``, ``"skops"`` + or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension. Returns ------- None @@ -195,10 +202,19 @@ def _create_config( def recursively_default_dict() -> MutableMapping: return collections.defaultdict(recursively_default_dict) + if save_format == "auto": + extension = str(model_path).split(".")[-1] + if extension in ["pkl", "pickle", "joblib"]: + save_format = "pickle" + elif extension == "skops": + save_format = "skops" + else: + raise UserWarning("File format should be either skops or pickle!") config = recursively_default_dict() config["sklearn"]["model"]["file"] = str(model_path) config["sklearn"]["environment"] = requirements config["sklearn"]["task"] = task + config["sklearn"]["save_format"] = save_format if "tabular" in task: config["sklearn"]["example_input"] = _get_example_input(data) @@ -251,6 +267,11 @@ def init( "text-regression", ], data, + save_format: Literal[ # type: ignore + "skops", + "pickle", + "auto", + ] = "auto", ) -> None: """Initialize a scikit-learn based Hugging Face repo. @@ -291,6 +312,9 @@ def init( :class:`numpy.ndarray`. If ``task`` is ``"text-classification"`` or ``"text-regression"``, the data needs to be a ``list`` of strings. + save_format: str + The format used to persist the model. Can be ``"auto"``, ``"skops"`` + or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension. Returns ------- None @@ -318,6 +342,7 @@ def init( dst=dst, task=task, data=data, + save_format=save_format, ) except Exception: shutil.rmtree(dst) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 5aa229d1..d1e0d578 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -74,11 +74,11 @@ def get_regressor(): return model -@pytest.fixture(scope="session") -def classifier_pickle(repo_path): +@pytest.fixture(scope="session", params=["model.skops", "model.pickle"]) +def classifier_pickle(repo_path, request): # Create a simple pickle file for the purpose of testing clf = get_classifier() - path = repo_path / "model.pickle" + path = repo_path / request.param try: with open(path, "wb") as f: @@ -96,7 +96,7 @@ def classifier_pickle(repo_path): } -@pytest.fixture(scope="session") +@pytest.fixture(scope="session", params=["skops", "pickle"]) def config_json(repo_path): path = repo_path / "config.json" try: From 36223e2835e822ef238080bc46e30dcc0139d177 Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Wed, 7 Dec 2022 16:43:01 +0300 Subject: [PATCH 02/17] added tests --- skops/hub_utils/tests/test_hf_hub.py | 176 ++++++++++++++++----------- 1 file changed, 102 insertions(+), 74 deletions(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index d1e0d578..1d14da64 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -36,6 +36,7 @@ _validate_folder, ) from skops.hub_utils.tests.common import HF_HUB_TOKEN +from skops.io import dump from skops.utils.fixes import metadata, path_unlink iris = load_iris(as_frame=True, return_X_y=False) @@ -74,40 +75,54 @@ def get_regressor(): return model -@pytest.fixture(scope="session", params=["model.skops", "model.pickle"]) -def classifier_pickle(repo_path, request): - # Create a simple pickle file for the purpose of testing +@pytest.fixture(scope="session") +def classifier(repo_path, config_json): + # Create a simple model file for the purpose of testing clf = get_classifier() - path = repo_path / request.param + config_path, file_format = config_json + model_file = CONFIG[file_format]["sklearn"]["model"]["file"] + path = repo_path / model_file try: - with open(path, "wb") as f: - pickle.dump(clf, f) + if file_format == "pickle": + with open(path, "wb") as f: + pickle.dump(clf, f) + elif file_format == "skops": + dump(clf, path) yield path finally: path_unlink(path, missing_ok=True) CONFIG = { - "sklearn": { - "environment": ['scikit-learn="1.1.1"'], - "model": {"file": "model.pickle"}, - } + "pickle": { + "sklearn": { + "environment": ['scikit-learn="1.1.1"'], + "model": {"file": "model.pickle"}, + } + }, + "skops": { + "sklearn": { + "environment": ['scikit-learn="1.1.1"'], + "model": {"file": "model.skops"}, + } + }, } @pytest.fixture(scope="session", params=["skops", "pickle"]) -def config_json(repo_path): +def config_json(repo_path, request): path = repo_path / "config.json" try: with open(path, "w") as f: - json.dump(CONFIG, f) - yield path + json.dump(CONFIG[request.param], f) + yield path, request.param finally: path_unlink(path, missing_ok=True) def test_validate_folder(config_json): + config_path, file_format = config_json _, file_path = tempfile.mkstemp() dir_path = tempfile.mkdtemp() with pytest.raises(TypeError, match="The given path is not a directory."): @@ -124,11 +139,12 @@ def test_validate_folder(config_json): ): _validate_folder(path=dir_path) - shutil.copy2(config_json, dir_path) - with pytest.raises(TypeError, match="Model file model.pickle does not exist."): + shutil.copy2(config_path, dir_path) + model_file = CONFIG[file_format]["sklearn"]["model"]["file"] + with pytest.raises(TypeError, match=f"Model file {model_file} does not exist."): _validate_folder(path=dir_path) - (Path(dir_path) / "model.pickle").touch() + (Path(dir_path) / model_file).touch() # this should now work w/o an error _validate_folder(path=dir_path) @@ -210,11 +226,11 @@ def test_create_config_invalid_text_data(temp_path): ) -def test_atomic_init(classifier_pickle, temp_path): +def test_atomic_init(classifier, temp_path): with pytest.raises(ValueError): # this fails since we're passing an invalid task. init( - model=classifier_pickle, + model=classifier, requirements=["scikit-learn"], dst=temp_path, task="tabular-classification", @@ -224,7 +240,7 @@ def test_atomic_init(classifier_pickle, temp_path): # this passes even though the above init has failed once, on the same # destination path. init( - model=classifier_pickle, + model=classifier, requirements=["scikit-learn"], dst=temp_path, task="tabular-classification", @@ -232,12 +248,12 @@ def test_atomic_init(classifier_pickle, temp_path): ) -def test_init_invalid_task(classifier_pickle, temp_path): +def test_init_invalid_task(classifier, temp_path): with pytest.raises( ValueError, match="Task invalid not supported. Supported tasks are" ): init( - model=classifier_pickle, + model=classifier, requirements=["scikit-learn"], dst=temp_path, task="invalid", @@ -245,14 +261,15 @@ def test_init_invalid_task(classifier_pickle, temp_path): ) -def test_init(classifier_pickle, config_json): +def test_init(classifier, config_json): + config_path, file_format = config_json # create a temp directory and delete it, we just need a unique name. dir_path = tempfile.mkdtemp() shutil.rmtree(dir_path) version = metadata.version("scikit-learn") init( - model=classifier_pickle, + model=classifier, requirements=[f'scikit-learn="{version}"'], dst=dir_path, task="tabular-classification", @@ -263,7 +280,7 @@ def test_init(classifier_pickle, config_json): # it should fail a second time since the folder is no longer empty. with pytest.raises(OSError, match="None-empty dst path already exists!"): init( - model=classifier_pickle, + model=classifier, requirements=[f'scikit-learn="{version}"'], dst=dir_path, task="tabular-classification", @@ -271,7 +288,8 @@ def test_init(classifier_pickle, config_json): ) -def test_init_no_warning_or_error(classifier_pickle, config_json): +def test_init_no_warning_or_error(classifier, config_json): + config_path, file_format = config_json # for the happy path, there should be no warning dir_path = tempfile.mkdtemp() shutil.rmtree(dir_path) @@ -280,7 +298,7 @@ def test_init_no_warning_or_error(classifier_pickle, config_json): with warnings.catch_warnings(): warnings.simplefilter("error") init( - model=classifier_pickle, + model=classifier, requirements=[f'scikit-learn="{version}"'], dst=dir_path, task="tabular-classification", @@ -289,6 +307,7 @@ def test_init_no_warning_or_error(classifier_pickle, config_json): def test_model_file_does_not_exist_raises(repo_path, config_json): + config_path, file_format = config_json # when the model file does not exist, raise an OSError model_path = repo_path / "foobar.pickle" dir_path = tempfile.mkdtemp() @@ -308,6 +327,7 @@ def test_model_file_does_not_exist_raises(repo_path, config_json): def test_init_empty_model_file_errors(repo_path, config_json): + config_path, file_format = config_json # when model file is empty, warn users model_path = Path(repo_path / "foobar.pickle") model_path.touch() @@ -336,14 +356,15 @@ def test_push_download( explicit_create, repo_path, destination_path, - classifier_pickle, + classifier, config_json, ): + config_path, file_format = config_json client = HfApi() version = metadata.version("scikit-learn") init( - model=classifier_pickle, + model=classifier, requirements=[f'scikit-learn="{version}"'], dst=destination_path, task="tabular-classification", @@ -373,7 +394,7 @@ def test_push_download( download(repo_id=repo_id, dst=destination_path, token=HF_HUB_TOKEN) files = client.list_repo_files(repo_id=repo_id, use_auth_token=HF_HUB_TOKEN) - for f_name in [classifier_pickle.name, config_json.name]: + for f_name in [classifier.name, config_path.name]: assert f_name in files try: @@ -408,6 +429,7 @@ def repo_path_for_inference(): ids=["classifier", "regressor"], ) def test_inference( + config_json, model_func, data, task, @@ -416,57 +438,63 @@ def test_inference( ): # test inference backend for classifier and regressor models. This test can # take a lot of time and be flaky. - client = HfApi() - - repo_path = repo_path_for_inference - model = model_func() - model_path = repo_path / "model.pickle" + config_path, file_format = config_json + if file_format == "pickle": + client = HfApi() + repo_path = repo_path_for_inference + model = model_func() + model_file = CONFIG[file_format]["sklearn"]["model"]["file"] + model_path = repo_path / model_file + if file_format == "pickle": + with open(model_path, "wb") as f: + pickle.dump(model, f) + elif file_format == "skops": + dump(model, model_path) - with open(model_path, "wb") as f: - pickle.dump(model, f) - - version = metadata.version("scikit-learn") - init( - model=model_path, - requirements=[f'scikit-learn="{version}"'], - dst=destination_path, - task=task, - data=data.data, - ) - - # a model card is needed for inference engine to work. - model_card = card.Card( - model, metadata=card.metadata_from_config(Path(destination_path)) - ) - model_card.save(Path(destination_path) / "README.md") - - user = client.whoami(token=HF_HUB_TOKEN)["name"] - repo_id = f"{user}/test-{uuid4()}" + version = metadata.version("scikit-learn") + init( + model=model_path, + requirements=[f'scikit-learn="{version}"'], + dst=destination_path, + task=task, + data=data.data, + ) - push( - repo_id=repo_id, - source=destination_path, - token=HF_HUB_TOKEN, - commit_message="test message", - create_remote=True, - # api-inference doesn't support private repos for community projects. - private=False, - ) + # a model card is needed for inference engine to work. + model_card = card.Card( + model, metadata=card.metadata_from_config(Path(destination_path)) + ) + model_card.save(Path(destination_path) / "README.md") + + user = client.whoami(token=HF_HUB_TOKEN)["name"] + repo_id = f"{user}/test-{uuid4()}" + + push( + repo_id=repo_id, + source=destination_path, + token=HF_HUB_TOKEN, + commit_message="test message", + create_remote=True, + # api-inference doesn't support private repos for community projects. + private=False, + ) - X_test = data.data.head(5) - y_pred = model.predict(X_test) - output = get_model_output(repo_id, data=X_test, token=HF_HUB_TOKEN) + X_test = data.data.head(5) + y_pred = model.predict(X_test) + output = get_model_output(repo_id, data=X_test, token=HF_HUB_TOKEN) - # cleanup - client.delete_repo(repo_id=repo_id, token=HF_HUB_TOKEN) - path_unlink(model_path, missing_ok=True) + # cleanup + client.delete_repo(repo_id=repo_id, token=HF_HUB_TOKEN) + path_unlink(model_path, missing_ok=True) - assert np.allclose(output, y_pred) + assert np.allclose(output, y_pred) -def test_get_config(repo_path): +def test_get_config(repo_path, config_json): + config_path, file_format = config_json config = get_config(repo_path) - assert config == CONFIG + + assert config == CONFIG[file_format] assert get_requirements(repo_path) == ['scikit-learn="1.1.1"'] @@ -528,14 +556,14 @@ def test_get_column_names_pandas_not_installed(pandas_not_installed): class TestAddFiles: @pytest.fixture - def init_path(self, classifier_pickle, config_json): + def init_path(self, classifier, config_json): # create temporary directory dir_path = tempfile.mkdtemp() shutil.rmtree(dir_path) version = metadata.version("scikit-learn") init( - model=classifier_pickle, + model=classifier, requirements=[f'scikit-learn="{version}"'], dst=dir_path, task="tabular-classification", From ece88ddb23f6ec0bf944787c210db97b1a75bc12 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Fri, 9 Dec 2022 13:10:36 +0300 Subject: [PATCH 03/17] Update skops/hub_utils/_hf_hub.py Co-authored-by: Adrin Jalali --- skops/hub_utils/_hf_hub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index e07ae375..e28d4e4a 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -191,7 +191,10 @@ def _create_config( The first 3 input values are used as example inputs. save_format: str The format used to persist the model. Can be ``"auto"``, ``"skops"`` - or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension. + or ``"pickle"``. Defaults to ``"auto"``, which would mean: + + - ``"pickle"`` if the extension is one of ``{".pickle", ".pkl", ".joblib"}`` + - ``"skops"`` if the extension is ``".skops"`` Returns ------- None From ea6c5ae7ebec5f8b9c310f23c45a5a3d7fee6c22 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Fri, 9 Dec 2022 13:10:53 +0300 Subject: [PATCH 04/17] Update skops/hub_utils/_hf_hub.py Co-authored-by: Adrin Jalali --- skops/hub_utils/_hf_hub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index e28d4e4a..e8400aa6 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -151,7 +151,7 @@ def _create_config( "text-regression", ], data, - save_format: Literal[ # type: ignore + model_format: Literal[ # type: ignore "skops", "pickle", "auto", From a6ffc3d3e7c6c1f4d8e25cdf6bf8d4f2cad4f6c0 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Fri, 9 Dec 2022 13:11:12 +0300 Subject: [PATCH 05/17] Update skops/hub_utils/_hf_hub.py Co-authored-by: Adrin Jalali --- skops/hub_utils/_hf_hub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index e8400aa6..fee2c5aa 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -212,7 +212,7 @@ def recursively_default_dict() -> MutableMapping: elif extension == "skops": save_format = "skops" else: - raise UserWarning("File format should be either skops or pickle!") + raise ValueError("Cannot determine the input file format. Please indicate the format using the `model_format` argument.") config = recursively_default_dict() config["sklearn"]["model"]["file"] = str(model_path) config["sklearn"]["environment"] = requirements From 59c45a4949f7049e800bcd3fe42ad81dbb4453cc Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Fri, 9 Dec 2022 13:58:42 +0300 Subject: [PATCH 06/17] addressed comments --- skops/hub_utils/_hf_hub.py | 31 +++++++++++++++------------- skops/hub_utils/tests/test_hf_hub.py | 26 ++++++++++++++++------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index fee2c5aa..dc89a11e 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -189,10 +189,10 @@ def _create_config( model. The first 3 input values are used as example inputs. - save_format: str + model_format: str The format used to persist the model. Can be ``"auto"``, ``"skops"`` or ``"pickle"``. Defaults to ``"auto"``, which would mean: - + - ``"pickle"`` if the extension is one of ``{".pickle", ".pkl", ".joblib"}`` - ``"skops"`` if the extension is ``".skops"`` Returns @@ -205,19 +205,22 @@ def _create_config( def recursively_default_dict() -> MutableMapping: return collections.defaultdict(recursively_default_dict) - if save_format == "auto": - extension = str(model_path).split(".")[-1] - if extension in ["pkl", "pickle", "joblib"]: - save_format = "pickle" - elif extension == "skops": - save_format = "skops" - else: - raise ValueError("Cannot determine the input file format. Please indicate the format using the `model_format` argument.") + if model_format == "auto": + extension = Path(model_path).suffix + if extension in [".pkl", ".pickle", ".joblib"]: + model_format = "pickle" + elif extension == ".skops": + model_format = "skops" + if model_format not in ["skops", "pickle"]: + raise ValueError( + "Cannot determine the input file format. Please indicate the format using" + " the `model_format` argument." + ) config = recursively_default_dict() config["sklearn"]["model"]["file"] = str(model_path) config["sklearn"]["environment"] = requirements config["sklearn"]["task"] = task - config["sklearn"]["save_format"] = save_format + config["sklearn"]["model_format"] = model_format if "tabular" in task: config["sklearn"]["example_input"] = _get_example_input(data) @@ -270,7 +273,7 @@ def init( "text-regression", ], data, - save_format: Literal[ # type: ignore + model_format: Literal[ # type: ignore "skops", "pickle", "auto", @@ -315,7 +318,7 @@ def init( :class:`numpy.ndarray`. If ``task`` is ``"text-classification"`` or ``"text-regression"``, the data needs to be a ``list`` of strings. - save_format: str + model_format: str The format used to persist the model. Can be ``"auto"``, ``"skops"`` or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension. Returns @@ -345,7 +348,7 @@ def init( dst=dst, task=task, data=data, - save_format=save_format, + model_format=model_format, ) except Exception: shutil.rmtree(dst) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 1d14da64..3d322ffc 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -121,6 +121,18 @@ def config_json(repo_path, request): path_unlink(path, missing_ok=True) +def test_validate_format(classifier, temp_path): + with pytest.raises(ValueError, match="Cannot determine the input file*"): + init( + model=classifier, + requirements=["scikit-learn"], + dst=temp_path, + task="tabular-classification", + data=iris.data, + model_format="dummy", + ) + + def test_validate_folder(config_json): config_path, file_format = config_json _, file_path = tempfile.mkstemp() @@ -439,17 +451,17 @@ def test_inference( # test inference backend for classifier and regressor models. This test can # take a lot of time and be flaky. config_path, file_format = config_json - if file_format == "pickle": + if file_format != "pickle": + return + else: client = HfApi() repo_path = repo_path_for_inference - model = model_func() model_file = CONFIG[file_format]["sklearn"]["model"]["file"] + model = model_func() model_path = repo_path / model_file - if file_format == "pickle": - with open(model_path, "wb") as f: - pickle.dump(model, f) - elif file_format == "skops": - dump(model, model_path) + + with open(model_path, "wb") as f: + pickle.dump(model, f) version = metadata.version("scikit-learn") init( From 1c2eaf13e95ab27cf3f27308e3f585de5713506a Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Fri, 9 Dec 2022 17:39:01 +0300 Subject: [PATCH 07/17] [CI Inference] --- docs/changes.rst | 2 + skops/hub_utils/tests/test_hf_hub.py | 84 ++++++++++++++-------------- 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 034074a4..1a7cc038 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -51,6 +51,8 @@ v0.2 - Add :meth:`skops.hub_utils.get_model_output` to get the model's output using The Hugging Face Hub's inference API, and return an array with the outputs. :pr:`105` by `Adrin Jalali`_. +- Add `model_format` argument to :meth:`skops.hub_utils.init` to write it + as a section to `config.json`. :pr:`242`by `Merve Noyan`_. v0.1 ---- diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 3d322ffc..5aa6097e 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -452,54 +452,54 @@ def test_inference( # take a lot of time and be flaky. config_path, file_format = config_json if file_format != "pickle": - return - else: - client = HfApi() - repo_path = repo_path_for_inference - model_file = CONFIG[file_format]["sklearn"]["model"]["file"] - model = model_func() - model_path = repo_path / model_file + pytest.skip("Unsupported configuration for inference test.") - with open(model_path, "wb") as f: - pickle.dump(model, f) + client = HfApi() + repo_path = repo_path_for_inference + model_file = CONFIG[file_format]["sklearn"]["model"]["file"] + model = model_func() + model_path = repo_path / model_file - version = metadata.version("scikit-learn") - init( - model=model_path, - requirements=[f'scikit-learn="{version}"'], - dst=destination_path, - task=task, - data=data.data, - ) + with open(model_path, "wb") as f: + pickle.dump(model, f) - # a model card is needed for inference engine to work. - model_card = card.Card( - model, metadata=card.metadata_from_config(Path(destination_path)) - ) - model_card.save(Path(destination_path) / "README.md") - - user = client.whoami(token=HF_HUB_TOKEN)["name"] - repo_id = f"{user}/test-{uuid4()}" - - push( - repo_id=repo_id, - source=destination_path, - token=HF_HUB_TOKEN, - commit_message="test message", - create_remote=True, - # api-inference doesn't support private repos for community projects. - private=False, - ) + version = metadata.version("scikit-learn") + init( + model=model_path, + requirements=[f'scikit-learn="{version}"'], + dst=destination_path, + task=task, + data=data.data, + ) - X_test = data.data.head(5) - y_pred = model.predict(X_test) - output = get_model_output(repo_id, data=X_test, token=HF_HUB_TOKEN) + # TODO: remove when card init at repo init is merged + model_card = card.Card( + model, metadata=card.metadata_from_config(Path(destination_path)) + ) + model_card.save(Path(destination_path) / "README.md") - # cleanup - client.delete_repo(repo_id=repo_id, token=HF_HUB_TOKEN) - path_unlink(model_path, missing_ok=True) + user = client.whoami(token=HF_HUB_TOKEN)["name"] + repo_id = f"{user}/test-{uuid4()}" + + push( + repo_id=repo_id, + source=destination_path, + token=HF_HUB_TOKEN, + commit_message="test message", + create_remote=True, + # api-inference doesn't support private repos for community projects. + private=False, + ) + + X_test = data.data.head(5) + y_pred = model.predict(X_test) + output = get_model_output(repo_id, data=X_test, token=HF_HUB_TOKEN) + + # cleanup + client.delete_repo(repo_id=repo_id, token=HF_HUB_TOKEN) + path_unlink(model_path, missing_ok=True) - assert np.allclose(output, y_pred) + assert np.allclose(output, y_pred) def test_get_config(repo_path, config_json): From 9eb24bd02ec76bd81a3c8d1e49a7208c8b93ae48 Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Fri, 9 Dec 2022 17:46:52 +0300 Subject: [PATCH 08/17] swapped fixture with mkdtemp --- skops/hub_utils/tests/test_hf_hub.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 5aa6097e..43311293 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -121,12 +121,14 @@ def config_json(repo_path, request): path_unlink(path, missing_ok=True) -def test_validate_format(classifier, temp_path): +def test_validate_format(classifier): + dir_path = tempfile.mkdtemp() + shutil.rmtree(dir_path) with pytest.raises(ValueError, match="Cannot determine the input file*"): init( model=classifier, requirements=["scikit-learn"], - dst=temp_path, + dst=dir_path, task="tabular-classification", data=iris.data, model_format="dummy", From d3e11e6a2795929ae080eaa84d61a0759dc47456 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Fri, 9 Dec 2022 20:33:58 +0300 Subject: [PATCH 09/17] Update skops/hub_utils/tests/test_hf_hub.py Co-authored-by: Adrin Jalali --- skops/hub_utils/tests/test_hf_hub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 43311293..367482ec 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -454,7 +454,7 @@ def test_inference( # take a lot of time and be flaky. config_path, file_format = config_json if file_format != "pickle": - pytest.skip("Unsupported configuration for inference test.") + pytest.skip(f"Inference only supports pickle at the moment. Given format: {file_format}") client = HfApi() repo_path = repo_path_for_inference From 589de5ceb24ac736cf0deabe01f24492468c4218 Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Fri, 9 Dec 2022 20:49:37 +0300 Subject: [PATCH 10/17] changed changelog --- docs/changes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 1a7cc038..40a8ed9c 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -14,6 +14,8 @@ v0.4 - :func:`.io.dump` and :func:`.io.load` now work with file like objects, which means you can use them with the ``with open(...) as f: dump(obj, f)`` pattern, like you'd do with ``pickle``. :pr:`234` by `Benjamin Bossan`_. +- Add `model_format` argument to :meth:`skops.hub_utils.init` to write it + as a section to `config.json`. :pr:`242`by `Merve Noyan`_. v0.3 ---- @@ -51,8 +53,6 @@ v0.2 - Add :meth:`skops.hub_utils.get_model_output` to get the model's output using The Hugging Face Hub's inference API, and return an array with the outputs. :pr:`105` by `Adrin Jalali`_. -- Add `model_format` argument to :meth:`skops.hub_utils.init` to write it - as a section to `config.json`. :pr:`242`by `Merve Noyan`_. v0.1 ---- From d4c1b89705811d50b58b55e377f7797b0d5f8d21 Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Mon, 12 Dec 2022 12:12:25 +0300 Subject: [PATCH 11/17] black From 2ea068f193d53f0035852323034695bbb819e3a7 Mon Sep 17 00:00:00 2001 From: merveenoyan Date: Mon, 12 Dec 2022 12:12:51 +0300 Subject: [PATCH 12/17] black tests --- skops/hub_utils/tests/test_hf_hub.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 367482ec..ff2cf781 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -454,7 +454,9 @@ def test_inference( # take a lot of time and be flaky. config_path, file_format = config_json if file_format != "pickle": - pytest.skip(f"Inference only supports pickle at the moment. Given format: {file_format}") + pytest.skip( + f"Inference only supports pickle at the moment. Given format: {file_format}" + ) client = HfApi() repo_path = repo_path_for_inference From ed70096c547e83ab1eaba93c4091cce250ef1c52 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Mon, 12 Dec 2022 14:39:36 +0300 Subject: [PATCH 13/17] Update docs/changes.rst Co-authored-by: Adrin Jalali --- docs/changes.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 40a8ed9c..4a5ab38c 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -14,8 +14,9 @@ v0.4 - :func:`.io.dump` and :func:`.io.load` now work with file like objects, which means you can use them with the ``with open(...) as f: dump(obj, f)`` pattern, like you'd do with ``pickle``. :pr:`234` by `Benjamin Bossan`_. -- Add `model_format` argument to :meth:`skops.hub_utils.init` to write it - as a section to `config.json`. :pr:`242`by `Merve Noyan`_. +- Add `model_format` argument to :meth:`skops.hub_utils.init` to be stored in + `config.json` so that we know how to load a model from the repository. + :pr:`242` by `Merve Noyan`_. v0.3 ---- From 23a6c07533c65c7673154b10291485e6476a6f51 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Mon, 12 Dec 2022 18:09:23 +0300 Subject: [PATCH 14/17] Update skops/hub_utils/_hf_hub.py Co-authored-by: Benjamin Bossan --- skops/hub_utils/_hf_hub.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index dc89a11e..d940d836 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -195,6 +195,7 @@ def _create_config( - ``"pickle"`` if the extension is one of ``{".pickle", ".pkl", ".joblib"}`` - ``"skops"`` if the extension is ``".skops"`` + Returns ------- None From 9dda4e669423bde579f142d9ae4d249b927dcad8 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Mon, 12 Dec 2022 18:09:43 +0300 Subject: [PATCH 15/17] Update skops/hub_utils/_hf_hub.py Co-authored-by: Benjamin Bossan --- skops/hub_utils/_hf_hub.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index d940d836..aa49c75d 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -189,6 +189,7 @@ def _create_config( model. The first 3 input values are used as example inputs. + model_format: str The format used to persist the model. Can be ``"auto"``, ``"skops"`` or ``"pickle"``. Defaults to ``"auto"``, which would mean: From a0a5b45b999c10766657596fbf51974c402084ab Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Mon, 12 Dec 2022 18:09:52 +0300 Subject: [PATCH 16/17] Update skops/hub_utils/_hf_hub.py Co-authored-by: Benjamin Bossan --- skops/hub_utils/_hf_hub.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index aa49c75d..c494883e 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -323,6 +323,7 @@ def init( model_format: str The format used to persist the model. Can be ``"auto"``, ``"skops"`` or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension. + Returns ------- None From 67a6de25eea703b591e2b38e2e1837796c1a3f28 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Tue, 13 Dec 2022 15:08:11 +0300 Subject: [PATCH 17/17] changed wording --- skops/hub_utils/_hf_hub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index c494883e..67d982a9 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -321,7 +321,7 @@ def init( ``"text-regression"``, the data needs to be a ``list`` of strings. model_format: str - The format used to persist the model. Can be ``"auto"``, ``"skops"`` + The format the model was persisted in. Can be ``"auto"``, ``"skops"`` or ``"pickle"``. Defaults to ``"auto"`` that relies on file extension. Returns