From a967f90d2a976c9c3e2aca3f441165c61d1cb2a6 Mon Sep 17 00:00:00 2001 From: aliosmankaya Date: Wed, 9 Nov 2022 01:49:57 +0300 Subject: [PATCH 1/6] Added support for Structured Arrays --- skops/hub_utils/_hf_hub.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 6d361972..aaf89906 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -135,7 +135,11 @@ def _get_column_names(data): # TODO: this is going to fail for Structured Arrays. We can add support for # them later if we see need for it. if isinstance(data, np.ndarray): - return [f"x{x}" for x in range(data.shape[1])] + if data.dtype.names: + return list(data.dtype.names) + return ( + [f"x{x}" for x in range(data.shape[1])] if len(data.shape) > 1 else ["x0"] + ) raise ValueError("The data is not a pandas.DataFrame or a numpy.ndarray.") From 98fd9bb9254640cd44b417c95caac05469adf1f9 Mon Sep 17 00:00:00 2001 From: aliosmankaya Date: Fri, 11 Nov 2022 23:01:06 +0300 Subject: [PATCH 2/6] Added test for supported Structured Arrays --- skops/hub_utils/tests/test_hf_hub.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 590b0169..b6491788 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -506,7 +506,18 @@ def test_get_column_names(): expected_columns = [f"x{x}" for x in range(10)] assert _get_column_names(X_array) == expected_columns - expected_columns = [f"column{x}" for x in range(10)] + expected_columns = ["foo", "bar"] + X_array = np.array([(1, 2), (3, 4)], dtype=[("foo", "i8"), ("bar", "f4")]) + assert _get_column_names(X_array) == expected_columns + + expected_columns = ["f0", "f1", "f2"] # Default names + X_array = np.zeros(3, dtype="int8, float32, float64") + assert _get_column_names(X_array) == expected_columns + + expected_columns = ["x0"] + X_array = np.zeros(3) + assert _get_column_names(X_array) == expected_columns + X_df = pd.DataFrame(X_array, columns=expected_columns) assert _get_column_names(X_df) == expected_columns From 07b6a4046418c654a20c49eca5d72b3987978600 Mon Sep 17 00:00:00 2001 From: aliosmankaya Date: Sat, 12 Nov 2022 12:24:50 +0300 Subject: [PATCH 3/6] Added me as a contributor --- docs/changes.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changes.rst b/docs/changes.rst index ed62c90e..89d86ee4 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -59,4 +59,5 @@ Contributors ~~~~~~~~~~~~ :user:`Adrin Jalali `, :user:`Merve Noyan `, -:user:`Benjamin Bossan `, :user:`Ayyuce Demirbas ` +:user:`Benjamin Bossan `, :user:`Ayyuce Demirbas `, +:user:`Ali Osman Kaya ` From 40d9a63cb6655b3ab6ba78691284105721fb3e82 Mon Sep 17 00:00:00 2001 From: aliosmankaya Date: Tue, 15 Nov 2022 23:47:56 +0300 Subject: [PATCH 4/6] Added Structured Arrays support to 'get_model_output' --- skops/hub_utils/_hf_hub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index aaf89906..9ac3fe95 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -643,7 +643,10 @@ def get_model_output(repo_id: str, data: Any, token: Optional[str] = None) -> An inputs = {"data": data.to_dict(orient="list")} except AttributeError: # the input is not a pandas DataFrame - inputs = {f"x{i}": data[:, i] for i in range(data.shape[1])} + inputs = { + col: (data[:, idx] if len(data.shape) > 1 else data[idx]) + for idx, col in enumerate(_get_column_names(data)) + } inputs = {"data": inputs} res = InferenceApi(repo_id=repo_id, task=model_info.pipeline_tag, token=token)( From f1a72969e2ec23c23757a8bae52ea80ceb1e4d45 Mon Sep 17 00:00:00 2001 From: aliosmankaya Date: Sat, 26 Nov 2022 21:36:45 +0300 Subject: [PATCH 5/6] Updated _get_column_names to get standardize col names, Added support to get_model_output for structured array --- skops/hub_utils/_hf_hub.py | 18 +++++++++++++----- skops/hub_utils/tests/test_hf_hub.py | 4 ++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index 9ac3fe95..a0282611 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -136,7 +136,7 @@ def _get_column_names(data): # them later if we see need for it. if isinstance(data, np.ndarray): if data.dtype.names: - return list(data.dtype.names) + return [f"x{x}" for x in range(len(data.dtype.names))] return ( [f"x{x}" for x in range(data.shape[1])] if len(data.shape) > 1 else ["x0"] ) @@ -643,10 +643,18 @@ def get_model_output(repo_id: str, data: Any, token: Optional[str] = None) -> An inputs = {"data": data.to_dict(orient="list")} except AttributeError: # the input is not a pandas DataFrame - inputs = { - col: (data[:, idx] if len(data.shape) > 1 else data[idx]) - for idx, col in enumerate(_get_column_names(data)) - } + if data.dtype.names: + inputs = { + col: ( + [(x[0] if len(data.shape) > 1 else x) for x in data[idx].tolist()] + ) + for idx, col in enumerate(_get_column_names(data)) + } + else: + inputs = { + col: data[:, idx].tolist() + for idx, col in enumerate(_get_column_names(data)) + } inputs = {"data": inputs} res = InferenceApi(repo_id=repo_id, task=model_info.pipeline_tag, token=token)( diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index 226c907c..1f6ff378 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -508,11 +508,11 @@ def test_get_column_names(): expected_columns = [f"x{x}" for x in range(10)] assert _get_column_names(X_array) == expected_columns - expected_columns = ["foo", "bar"] + expected_columns = ["x0", "x1"] X_array = np.array([(1, 2), (3, 4)], dtype=[("foo", "i8"), ("bar", "f4")]) assert _get_column_names(X_array) == expected_columns - expected_columns = ["f0", "f1", "f2"] # Default names + expected_columns = ["x0", "x1", "x2"] # Default names X_array = np.zeros(3, dtype="int8, float32, float64") assert _get_column_names(X_array) == expected_columns From d219852ea50d725a330d311528fb7d700e0fddb7 Mon Sep 17 00:00:00 2001 From: aliosmankaya Date: Fri, 2 Dec 2022 01:30:51 +0300 Subject: [PATCH 6/6] Updated get_model_output for multi-dimensional arrays --- skops/hub_utils/_hf_hub.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index daf80519..be8d9f1b 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -646,14 +646,14 @@ def get_model_output(repo_id: str, data: Any, token: Optional[str] = None) -> An # the input is not a pandas DataFrame if data.dtype.names: inputs = { - col: ( - [(x[0] if len(data.shape) > 1 else x) for x in data[idx].tolist()] - ) + col: [x[0] for x in data[:, idx].tolist()] + if len(data.shape) > 1 + else [x[0] for x in data.tolist()] for idx, col in enumerate(_get_column_names(data)) } else: inputs = { - col: data[:, idx].tolist() + col: data[:, idx].tolist() if len(data.shape) > 1 else data.tolist() for idx, col in enumerate(_get_column_names(data)) } inputs = {"data": inputs}