From ed74d52249fabde739cf0599be0210c818b5d272 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Sep 2017 20:44:44 -0500 Subject: [PATCH 1/3] ARROW-1557 [Python] Validate names length in Table.from_arrays We now raise a ValueError when the length of the names doesn't match the length of the arrays. --- python/pyarrow/table.pxi | 3 +++ python/pyarrow/tests/test_table.py | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 68eb5cbdcac..d84c5a7aeb7 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -328,6 +328,9 @@ cdef int _schema_from_arrays( if names is None: raise ValueError('Must pass names when constructing ' 'from Array objects') + if len(names) != K: + raise ValueError("Length of names ({}) does not match " + "length of arrays ({})".format(len(names), K)) for i in range(K): val = arrays[i] if isinstance(val, (Array, ChunkedArray)): diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 4d5cb364c69..3a523e73c80 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -82,6 +82,18 @@ def test_recordbatch_basics(): batch[2] +def test_recordbatch_from_arrays_invalid_names(): + data = [ + pa.array(range(5)), + pa.array([-10, -5, 0, 5, 10]) + ] + with pytest.raises(ValueError): + pa.RecordBatch.from_arrays(data, names=['a', 'b', 'c']) + + with pytest.raises(ValueError): + pa.RecordBatch.from_arrays(data, names=['a']) + + def test_recordbatch_empty_metadata(): data = [ pa.array(range(5)), @@ -200,6 +212,18 @@ def test_table_basics(): assert chunk is not None +def test_table_from_arrays_invalid_names(): + data = [ + pa.array(range(5)), + pa.array([-10, -5, 0, 5, 10]) + ] + with pytest.raises(ValueError): + pa.Table.from_arrays(data, names=['a', 'b', 'c']) + + with pytest.raises(ValueError): + pa.Table.from_arrays(data, names=['a']) + + def test_table_add_column(): data = [ pa.array(range(5)), From 965a560867f45025dcbfe50c572593faa7d7cb33 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 20 Sep 2017 00:54:40 -0400 Subject: [PATCH 2/3] Fix test failure exposed in test_parquet.py Change-Id: Ie900ae1dac90e8e8326e22e950dfc7a50803dcd8 --- python/pyarrow/tests/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 9b5a4bcc43c..09e427fb37b 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -58,7 +58,7 @@ def test_single_pylist_column_roundtrip(tmpdir): filename = tmpdir.join('single_{}_column.parquet' .format(dtype.__name__)) data = [pa.array(list(map(dtype, range(5))))] - table = pa.Table.from_arrays(data, names=('a', 'b')) + table = pa.Table.from_arrays(data, names=['a']) _write_table(table, filename.strpath) table_read = _read_table(filename.strpath) for col_written, col_read in zip(table.itercolumns(), From 4df6f5935eb8c05f9ca147f6cda190c43ca97d2f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 20 Sep 2017 07:18:25 -0500 Subject: [PATCH 3/3] REF: avoid redundant len calculation --- python/pyarrow/table.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index d84c5a7aeb7..028797e45b8 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -315,7 +315,7 @@ cdef int _schema_from_arrays( fields.resize(K) - if len(arrays) == 0: + if not K: raise ValueError('Must pass at least one array') if isinstance(arrays[0], Column):