diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index d9861f289b1..eb74dea852b 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -484,6 +484,8 @@ cdef class RowGroupMetaData: return True def column(self, int i): + if i < 0 or i >= self.num_columns: + raise IndexError('{0} out of bounds'.format(i)) chunk = ColumnChunkMetaData() chunk.init(deref(self.metadata), i) return chunk diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 3f7f4fd50ab..ca3fbc4c816 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -659,11 +659,23 @@ def test_parquet_metadata_api(): assert isinstance(col_meta, pq.ColumnChunkMetaData) repr(col_meta) + with pytest.raises(IndexError): + meta.row_group(-1) + + with pytest.raises(IndexError): + meta.row_group(meta.num_row_groups + 1) + rg_meta = meta.row_group(0) assert rg_meta.num_rows == len(df) assert rg_meta.num_columns == ncols + 1 # +1 for index assert rg_meta.total_byte_size > 0 + with pytest.raises(IndexError): + col_meta = rg_meta.column(-1) + + with pytest.raises(IndexError): + col_meta = rg_meta.column(ncols + 2) + col_meta = rg_meta.column(0) assert col_meta.file_offset > 0 assert col_meta.file_path == '' # created from BytesIO