diff --git a/docs/changes.rst b/docs/changes.rst index f1aa7739..0ea29b5c 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -82,5 +82,5 @@ Contributors ~~~~~~~~~~~~ :user:`Adrin Jalali `, :user:`Merve Noyan `, -:user:`Benjamin Bossan `, :user:`Ayyuce Demirbas -`, :user:`Prajjwal Mishra ` +:user:`Benjamin Bossan `, :user:`Ayyuce Demirbas `, +:user:`Prajjwal Mishra `, :user:`Ali Osman Kaya ` diff --git a/skops/hub_utils/_hf_hub.py b/skops/hub_utils/_hf_hub.py index d76e1de3..f6c223d2 100644 --- a/skops/hub_utils/_hf_hub.py +++ b/skops/hub_utils/_hf_hub.py @@ -132,7 +132,11 @@ def _get_column_names(data): # TODO: this is going to fail for Structured Arrays. We can add support for # them later if we see need for it. if isinstance(data, np.ndarray): - return [f"x{x}" for x in range(data.shape[1])] + if data.dtype.names: + return [f"x{x}" for x in range(len(data.dtype.names))] + return ( + [f"x{x}" for x in range(data.shape[1])] if len(data.shape) > 1 else ["x0"] + ) raise ValueError("The data is not a pandas.DataFrame or a numpy.ndarray.") @@ -672,7 +676,18 @@ def get_model_output(repo_id: str, data: Any, token: Optional[str] = None) -> An inputs = {"data": data.to_dict(orient="list")} except AttributeError: # the input is not a pandas DataFrame - inputs = {f"x{i}": data[:, i] for i in range(data.shape[1])} + if data.dtype.names: + inputs = { + col: [x[0] for x in data[:, idx].tolist()] + if len(data.shape) > 1 + else [x[0] for x in data.tolist()] + for idx, col in enumerate(_get_column_names(data)) + } + else: + inputs = { + col: data[:, idx].tolist() if len(data.shape) > 1 else data.tolist() + for idx, col in enumerate(_get_column_names(data)) + } inputs = {"data": inputs} res = InferenceApi(repo_id=repo_id, task=model_info.pipeline_tag, token=token)( diff --git a/skops/hub_utils/tests/test_hf_hub.py b/skops/hub_utils/tests/test_hf_hub.py index c4db9aee..231c547c 100644 --- a/skops/hub_utils/tests/test_hf_hub.py +++ b/skops/hub_utils/tests/test_hf_hub.py @@ -551,7 +551,18 @@ def test_get_column_names(): expected_columns = [f"x{x}" for x in range(10)] assert _get_column_names(X_array) == expected_columns - expected_columns = [f"column{x}" for x in range(10)] + expected_columns = ["x0", "x1"] + X_array = np.array([(1, 2), (3, 4)], dtype=[("foo", "i8"), ("bar", "f4")]) + assert _get_column_names(X_array) == expected_columns + + expected_columns = ["x0", "x1", "x2"] # Default names + X_array = np.zeros(3, dtype="int8, float32, float64") + assert _get_column_names(X_array) == expected_columns + + expected_columns = ["x0"] + X_array = np.zeros(3) + assert _get_column_names(X_array) == expected_columns + X_df = pd.DataFrame(X_array, columns=expected_columns) assert _get_column_names(X_df) == expected_columns