From c98461c8a4d5663469d0eddad457df793d4d0e07 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 19 Aug 2019 10:02:24 -0500 Subject: [PATCH] Add unit test exhibiting that CategoricalIndex survives roundtrip to Parquet format --- python/pyarrow/tests/test_parquet.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 12288bbda68..5ba2c74bf81 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -2985,6 +2985,21 @@ def test_parquet_file_too_small(tempdir): pq.read_table(path) +@pytest.mark.pandas +def test_categorical_index_survives_roundtrip(): + # ARROW-3652, addressed by ARROW-3246 + df = pd.DataFrame([['a', 'b'], ['c', 'd']], columns=['c1', 'c2']) + df['c1'] = df['c1'].astype('category') + df = df.set_index(['c1']) + + table = pa.Table.from_pandas(df) + bos = pa.BufferOutputStream() + pq.write_table(table, bos) + ref_df = pq.read_pandas(bos.getvalue()).to_pandas() + assert isinstance(ref_df.index, pd.CategoricalIndex) + assert ref_df.index.equals(df.index) + + def test_dictionary_array_automatically_read(): # ARROW-3246