From 15a22d543311874e27b0b76c6976cd98737e00b1 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 24 Aug 2020 16:34:59 -0400 Subject: [PATCH 1/6] TST: added new test --- python/pyarrow/tests/test_pandas.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 2d66a320481..e4057dcc505 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2447,19 +2447,15 @@ def test_category_zero_chunks(self): expected = pd.DataFrame({'a': expected}) tm.assert_frame_equal(result, expected) - def test_mixed_types_fails(self): - data = pd.DataFrame({'a': ['a', 1, 2.0]}) - with pytest.raises(pa.ArrowTypeError): - pa.Table.from_pandas(data) - - data = pd.DataFrame({'a': [1, True]}) - with pytest.raises(pa.ArrowTypeError): - pa.Table.from_pandas(data) - - data = pd.DataFrame({'a': ['a', 1, 2.0]}) - expected_msg = 'Conversion failed for column a' - with pytest.raises(pa.ArrowTypeError, match=expected_msg): - pa.Table.from_pandas(data) + @pytest.mark.parametrize( + "data,error_type", + [({"a": ["a", 1, 2.0]}, pa.ArrowTypeError), ({"a": [1, True]}, pa.ArrowTypeError), ({"a": ["a", 1, 2.0]}, pa.ArrowTypeError), ({"a": [1, "a"]}, pa.ArrowInvalid )], + ) + def test_mixed_types_fails(self, data, error_type): + expected_msg = "Conversion failed for column a" + df = pd.DataFrame(data) + with pytest.raises(error_type, match=expected_msg): + pa.Table.from_pandas(df) def test_strided_data_import(self): cases = [] @@ -3531,9 +3527,9 @@ def test_dictionary_from_pandas_specified_type(): assert result.type.equals(typ) assert result.to_pylist() == ['a', 'b'] - # mismatching values type -> raise error (for now a deprecation warning) + # mismatching values type -> raise error typ = pa.dictionary(index_type=pa.int8(), value_type=pa.int64()) - with pytest.warns(FutureWarning): + with pytest.raises(pa.ArrowInvalid): result = pa.array(cat, type=typ) assert result.to_pylist() == ['a', 'b'] From e59eed60c6f88579e075bd06298ca41180711564 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 24 Aug 2020 23:23:13 -0400 Subject: [PATCH 2/6] ValueConverter now raises ArrowInvalid if type int and cannot convert --- cpp/src/arrow/python/python_to_arrow.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index 949213f4bb2..964b3f2c8af 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -106,7 +106,12 @@ struct ValueConverter> { static inline Result FromPython(PyObject* obj) { ValueType value; - RETURN_NOT_OK(internal::CIntFromPython(obj, &value)); + arrow::Status s_ = internal::CIntFromPython(obj, &value); + if(!s_.ok() && !internal::PyIntScalar_Check(obj)){ + return internal::InvalidValue(obj, "tried to convert to int"); + } else { + RETURN_NOT_OK(s_); + } return value; } }; From 5985d49f8eab571bf8fbfcc6177375b95efbcec4 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 25 Aug 2020 00:22:25 -0400 Subject: [PATCH 3/6] TST: rewrite existing tests --- python/pyarrow/tests/test_compute.py | 2 +- python/pyarrow/tests/test_convert_builtin.py | 7 ++----- python/pyarrow/tests/test_pandas.py | 14 ++++++++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 3c2e0865f8d..2fa00f4bd61 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -736,7 +736,7 @@ def test_is_null(): def test_fill_null(): arr = pa.array([1, 2, None, 4], type=pa.int8()) fill_value = pa.array([5], type=pa.int8()) - with pytest.raises(TypeError): + with pytest.raises(pa.ArrowInvalid): arr.fill_null(fill_value) arr = pa.array([None, None, None, None], type=pa.null()) diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index f62a9414e1e..f1dc05428a2 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -382,11 +382,8 @@ def test_sequence_custom_integers(seq): @parametrize_with_iterable_types def test_broken_integers(seq): data = [MyBrokenInt()] - with pytest.raises(ZeroDivisionError) as exc_info: + with pytest.raises(pa.ArrowInvalid): pa.array(seq(data), type=pa.int64()) - # Original traceback is kept - tb_lines = traceback.format_tb(exc_info.tb) - assert "# MARKER" in tb_lines[-1] def test_numpy_scalars_mixed_type(): @@ -1643,7 +1640,7 @@ def test_map_from_dicts(): # Invalid dictionary types for entry in [[{'key': '1', 'value': 5}], [{'key': {'value': 2}}]]: - with pytest.raises(TypeError, match="integer is required"): + with pytest.raises(pa.ArrowInvalid, match="tried to convert to int"): pa.array([entry], type=pa.map_('i4', 'i4')) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index e4057dcc505..f31d1c2e76a 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2449,12 +2449,18 @@ def test_category_zero_chunks(self): @pytest.mark.parametrize( "data,error_type", - [({"a": ["a", 1, 2.0]}, pa.ArrowTypeError), ({"a": [1, True]}, pa.ArrowTypeError), ({"a": ["a", 1, 2.0]}, pa.ArrowTypeError), ({"a": [1, "a"]}, pa.ArrowInvalid )], + [ + ({"a": ["a", 1, 2.0]}, pa.ArrowTypeError), + ({"a": ["a", 1, 2.0]}, pa.ArrowTypeError), + ({"a": [1, True]}, pa.ArrowTypeError), + ({"a": [True, "a"]}, pa.ArrowInvalid), + ({"a": [1, "a"]}, pa.ArrowInvalid), + ({"a": [1.0, "a"]}, pa.ArrowInvalid), + ], ) def test_mixed_types_fails(self, data, error_type): - expected_msg = "Conversion failed for column a" df = pd.DataFrame(data) - with pytest.raises(error_type, match=expected_msg): + with pytest.raises(error_type): pa.Table.from_pandas(df) def test_strided_data_import(self): @@ -3531,7 +3537,7 @@ def test_dictionary_from_pandas_specified_type(): typ = pa.dictionary(index_type=pa.int8(), value_type=pa.int64()) with pytest.raises(pa.ArrowInvalid): result = pa.array(cat, type=typ) - assert result.to_pylist() == ['a', 'b'] + assert result.to_pylist() == ["a", "b"] # mismatching order -> raise error (for now a deprecation warning) typ = pa.dictionary( From 80e0078d156dde392f2ce0d249790704d36b1e17 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 26 Aug 2020 00:53:31 -0400 Subject: [PATCH 4/6] fix formatting + remove unused import --- cpp/src/arrow/python/python_to_arrow.cc | 2 +- python/pyarrow/tests/test_convert_builtin.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index 964b3f2c8af..849c474ded3 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -107,7 +107,7 @@ struct ValueConverter> { static inline Result FromPython(PyObject* obj) { ValueType value; arrow::Status s_ = internal::CIntFromPython(obj, &value); - if(!s_.ok() && !internal::PyIntScalar_Check(obj)){ + if (!s_.ok() && !internal::PyIntScalar_Check(obj)) { return internal::InvalidValue(obj, "tried to convert to int"); } else { RETURN_NOT_OK(s_); diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index f1dc05428a2..b8050f96468 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -25,7 +25,6 @@ import decimal import itertools import math -import traceback import numpy as np import pytz From 8f2816040ce1cfac6948637943c7927ae19d6a37 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 27 Aug 2020 23:39:33 -0400 Subject: [PATCH 5/6] remove assert for dict_from_pandas_spec_type + add msg check for mixed_types --- python/pyarrow/tests/test_pandas.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index f31d1c2e76a..31b09050f45 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -19,6 +19,7 @@ import decimal import json import multiprocessing as mp +import re import sys from collections import OrderedDict @@ -2460,7 +2461,8 @@ def test_category_zero_chunks(self): ) def test_mixed_types_fails(self, data, error_type): df = pd.DataFrame(data) - with pytest.raises(error_type): + msg = "Conversion failed for column a with type object" + with pytest.raises(error_type, match=msg): pa.Table.from_pandas(df) def test_strided_data_import(self): @@ -3537,7 +3539,6 @@ def test_dictionary_from_pandas_specified_type(): typ = pa.dictionary(index_type=pa.int8(), value_type=pa.int64()) with pytest.raises(pa.ArrowInvalid): result = pa.array(cat, type=typ) - assert result.to_pylist() == ["a", "b"] # mismatching order -> raise error (for now a deprecation warning) typ = pa.dictionary( From 70ae0a9d2fd81fec10ea1a1019a450d4cbb55031 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 27 Aug 2020 23:40:09 -0400 Subject: [PATCH 6/6] add msg check for test_fill_null + remove unused import --- python/pyarrow/tests/test_compute.py | 2 +- python/pyarrow/tests/test_pandas.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 2fa00f4bd61..ce45fc6f1bd 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -736,7 +736,7 @@ def test_is_null(): def test_fill_null(): arr = pa.array([1, 2, None, 4], type=pa.int8()) fill_value = pa.array([5], type=pa.int8()) - with pytest.raises(pa.ArrowInvalid): + with pytest.raises(pa.ArrowInvalid, match="tried to convert to int"): arr.fill_null(fill_value) arr = pa.array([None, None, None, None], type=pa.null()) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 31b09050f45..03407521c12 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -19,7 +19,6 @@ import decimal import json import multiprocessing as mp -import re import sys from collections import OrderedDict