From b05e7344ff386617895d19e4f00a800152f8a809 Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Fri, 19 Jul 2019 13:08:30 +0200 Subject: [PATCH] add bounds check to RowGroupMetaData.column --- python/pyarrow/_parquet.pyx | 2 ++ python/pyarrow/tests/test_parquet.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index d9861f289b1..eb74dea852b 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -484,6 +484,8 @@ cdef class RowGroupMetaData: return True def column(self, int i): + if i < 0 or i >= self.num_columns: + raise IndexError('{0} out of bounds'.format(i)) chunk = ColumnChunkMetaData() chunk.init(deref(self.metadata), i) return chunk diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 3f7f4fd50ab..ca3fbc4c816 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -659,11 +659,23 @@ def test_parquet_metadata_api(): assert isinstance(col_meta, pq.ColumnChunkMetaData) repr(col_meta) + with pytest.raises(IndexError): + meta.row_group(-1) + + with pytest.raises(IndexError): + meta.row_group(meta.num_row_groups + 1) + rg_meta = meta.row_group(0) assert rg_meta.num_rows == len(df) assert rg_meta.num_columns == ncols + 1 # +1 for index assert rg_meta.total_byte_size > 0 + with pytest.raises(IndexError): + col_meta = rg_meta.column(-1) + + with pytest.raises(IndexError): + col_meta = rg_meta.column(ncols + 2) + col_meta = rg_meta.column(0) assert col_meta.file_offset > 0 assert col_meta.file_path == '' # created from BytesIO