From 869b352e3be837d454177103257e28aa2ae3cc87 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 13 Sep 2022 09:44:18 +0200 Subject: [PATCH 01/18] First try at adding cython test example to ve run from pytest - not working yet --- python/CMakeLists.txt | 3 +- python/pyarrow/_pyarrow_cpp_tests.pxd | 32 ++++++++++++++++ python/pyarrow/_pyarrow_cpp_tests.pyx | 55 +++++++++++++++++++++++++++ python/pyarrow/tests/test_python.py | 37 ++++++++++++++++++ python/setup.py | 1 + 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 python/pyarrow/_pyarrow_cpp_tests.pxd create mode 100644 python/pyarrow/_pyarrow_cpp_tests.pyx create mode 100644 python/pyarrow/tests/test_python.py diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 5a61227deed..515b8c07466 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -410,7 +410,8 @@ set(CYTHON_EXTENSIONS _feather _fs _hdfsio - _json) + _json + _pyarrow_cpp_tests) set(LINK_LIBS ArrowPython::arrow_python_shared) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd new file mode 100644 index 00000000000..6c8c5afd7a6 --- /dev/null +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ +# cython: language_level = 3 + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport (c_string, CStatus) + +cdef extern from "arrow/python/decimal.h" namespace "arrow::py::internal" nogil: + + PyObject* DecimalFromString( + Decimal, + const c_string decimal_string) + + CStatus PythonDecimalToString( + PyObject* python_decimal, + c_string* out); \ No newline at end of file diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx new file mode 100644 index 00000000000..2f010881601 --- /dev/null +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: profile=False +# distutils: language = c++ + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * +from pyarrow.lib cimport (check_status) + +from decimal import Decimal + +import functools +import inspect + +def cytest(func): + """ + Wraps `func` in a plain Python function. + """ + + @functools.wraps(func) + def wrapped(*args, **kwargs): + bound = inspect.signature(func).bind(*args, **kwargs) + return func(*bound.args, **bound.kwargs) + + return wrapped + +#cdef create_python_decimal(c_string& string_value): +# cdef PyObject* decimal_value = DecimalFromString(Decimal, string_value) +# return PyObject_to_object(decimal_value) + +@cytest +def test_PythonDecimalToString(): + cdef c_string decimal_string = b'-39402950693754869342983' + cdef PyObject* decimal_value = DecimalFromString(Decimal, decimal_string) + cdef c_string string_out + + with nogil: + check_status(PythonDecimalToString(decimal_value, &string_out)) + + assert string_out == decimal_string diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py new file mode 100644 index 00000000000..a99430bb509 --- /dev/null +++ b/python/pyarrow/tests/test_python.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import importlib +import sys + +# list of Cython modules containing tests +cython_test_modules = ["pyarrow._pyarrow_cpp_tests"] + +for mod in cython_test_modules: + # For each callable in `mod` with name `test_*`, + # set the result as an attribute of this module. + mod = importlib.import_module(mod) + for name in dir(mod): + item = getattr(mod, name) + if callable(item) and name.startswith("test_"): + setattr(sys.modules[__name__], name, item) + +# from pyarrow._pyarrow_cpp_tests import (test_PythonDecimalToString) # noqa + +# def test_to_string(): +# string_out = test_PythonDecimalToString() +# assert string_out == str('-39402950693754869342983') \ No newline at end of file diff --git a/python/setup.py b/python/setup.py index 6852a3affb8..cd855e869a8 100755 --- a/python/setup.py +++ b/python/setup.py @@ -220,6 +220,7 @@ def initialize_options(self): '_feather', '_parquet', '_parquet_encryption', + '_pyarrow_cpp_tests', '_orc', '_plasma', '_gcsfs', From a6081c8810e8d16270d5b97c7d35902e9cc86fce Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 13 Sep 2022 15:13:37 +0200 Subject: [PATCH 02/18] Make the test simpler - segfaults currently --- python/pyarrow/_pyarrow_cpp_tests.pxd | 6 ++--- python/pyarrow/_pyarrow_cpp_tests.pyx | 32 +++++++++------------------ python/pyarrow/tests/test_python.py | 31 +++++++++++++------------- 3 files changed, 28 insertions(+), 41 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index 6c8c5afd7a6..fbf746d5b29 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -24,9 +24,9 @@ from pyarrow.includes.libarrow cimport (c_string, CStatus) cdef extern from "arrow/python/decimal.h" namespace "arrow::py::internal" nogil: PyObject* DecimalFromString( - Decimal, - const c_string decimal_string) + PyObject* decimal_constructor, + const c_string& decimal_string) CStatus PythonDecimalToString( PyObject* python_decimal, - c_string* out); \ No newline at end of file + c_string* out); diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index 2f010881601..81ee2c7d3f0 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -# cython: profile=False +# cython: profile=False, binding=True # distutils: language = c++ from pyarrow.includes.common cimport * @@ -24,32 +24,20 @@ from pyarrow.lib cimport (check_status) from decimal import Decimal -import functools -import inspect - -def cytest(func): - """ - Wraps `func` in a plain Python function. - """ - - @functools.wraps(func) - def wrapped(*args, **kwargs): - bound = inspect.signature(func).bind(*args, **kwargs) - return func(*bound.args, **bound.kwargs) - - return wrapped - #cdef create_python_decimal(c_string& string_value): # cdef PyObject* decimal_value = DecimalFromString(Decimal, string_value) # return PyObject_to_object(decimal_value) -@cytest def test_PythonDecimalToString(): - cdef c_string decimal_string = b'-39402950693754869342983' - cdef PyObject* decimal_value = DecimalFromString(Decimal, decimal_string) - cdef c_string string_out + cdef: + c_string decimal_string = b'-39402950693754869342983' + PyObject* python_object = DecimalFromString(Decimal, decimal_string) + c_string string_result with nogil: - check_status(PythonDecimalToString(decimal_value, &string_out)) + check_status(PythonDecimalToString( + python_object, + &string_result) + ) - assert string_out == decimal_string + assert string_result == decimal_string diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index a99430bb509..df138b77535 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -15,23 +15,22 @@ # specific language governing permissions and limitations # under the License. -import importlib -import sys +# import importlib +# import sys -# list of Cython modules containing tests -cython_test_modules = ["pyarrow._pyarrow_cpp_tests"] +# # list of Cython modules containing tests +# cython_test_modules = ["pyarrow._pyarrow_cpp_tests"] -for mod in cython_test_modules: - # For each callable in `mod` with name `test_*`, - # set the result as an attribute of this module. - mod = importlib.import_module(mod) - for name in dir(mod): - item = getattr(mod, name) - if callable(item) and name.startswith("test_"): - setattr(sys.modules[__name__], name, item) +# for mod in cython_test_modules: +# # For each callable in `mod` with name `test_*`, +# # set the result as an attribute of this module. +# mod = importlib.import_module(mod) +# for name in dir(mod): +# item = getattr(mod, name) +# if callable(item) and name.startswith("test_"): +# setattr(sys.modules[__name__], name, item) -# from pyarrow._pyarrow_cpp_tests import (test_PythonDecimalToString) # noqa +from pyarrow._pyarrow_cpp_tests import (test_PythonDecimalToString) # noqa -# def test_to_string(): -# string_out = test_PythonDecimalToString() -# assert string_out == str('-39402950693754869342983') \ No newline at end of file +def test_python_decimal_to_string(): + test_PythonDecimalToString() From 28c926b035136aefa697741fa94ee2dcfacbd73e Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 14 Sep 2022 12:01:17 +0200 Subject: [PATCH 03/18] Add working test for PythonDecimalToString, InferPrecisionAndScale and InferPrecisionAndNegativeScale --- python/pyarrow/_pyarrow_cpp_tests.pxd | 10 +++++++ python/pyarrow/_pyarrow_cpp_tests.pyx | 41 +++++++++++++++++++++++---- python/pyarrow/tests/test_python.py | 11 ++++++- python/setup.cfg | 3 ++ 4 files changed, 59 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index fbf746d5b29..5588010a786 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -30,3 +30,13 @@ cdef extern from "arrow/python/decimal.h" namespace "arrow::py::internal" nogil: CStatus PythonDecimalToString( PyObject* python_decimal, c_string* out); + + cdef cppclass DecimalMetadata: + DecimalMetadata() + DecimalMetadata(int32_t precision, int32_t scale) + + CStatus Update(int32_t suggested_precision, int32_t suggested_scale) + CStatus Update(PyObject* object) + + int32_t precision() + int32_t scale() diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index 81ee2c7d3f0..d254a36314b 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -34,10 +34,41 @@ def test_PythonDecimalToString(): PyObject* python_object = DecimalFromString(Decimal, decimal_string) c_string string_result - with nogil: - check_status(PythonDecimalToString( - python_object, - &string_result) - ) + # Should a check be added here that python_object != nullptr ? + + check_status(PythonDecimalToString( + python_object, + &string_result) + ) assert string_result == decimal_string + +def test_InferPrecisionAndScale(): + cdef: + c_string decimal_string = b'-394029506937548693.42983' + PyObject* python_object = DecimalFromString(Decimal, decimal_string) + DecimalMetadata metadata + int32_t expected_precision = (decimal_string.size()) - 2 # 1 for -, 1 for . + int32_t expected_scale = 5 + + check_status(metadata.Update( + python_object) + ) + + assert expected_precision == metadata.precision() + assert expected_scale == metadata.scale() + +def test_InferPrecisionAndNegativeScale(): + cdef: + c_string decimal_string = b'-3.94042983E+10' + PyObject* python_object = DecimalFromString(Decimal, decimal_string) + DecimalMetadata metadata + int32_t expected_precision = 11 + int32_t expected_scale = 0 + + check_status(metadata.Update( + python_object) + ) + + assert expected_precision == metadata.precision() + assert expected_scale == metadata.scale() diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index df138b77535..ebaab4b452f 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -30,7 +30,16 @@ # if callable(item) and name.startswith("test_"): # setattr(sys.modules[__name__], name, item) -from pyarrow._pyarrow_cpp_tests import (test_PythonDecimalToString) # noqa +from pyarrow._pyarrow_cpp_tests import (test_PythonDecimalToString, # noqa + test_InferPrecisionAndScale, + test_InferPrecisionAndNegativeScale) + def test_python_decimal_to_string(): test_PythonDecimalToString() + +def test_infer_precision_and_scale(): + test_InferPrecisionAndScale() + +def test_infer_precision_and_negative_scale(): + test_InferPrecisionAndNegativeScale() diff --git a/python/setup.cfg b/python/setup.cfg index 062ce2745d8..cb9d0cfca58 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -28,6 +28,9 @@ build-dir = doc/_build addopts = --ignore=scripts filterwarnings = error:The SparseDataFrame:FutureWarning + # ignore warnings when Cython functions are called + # from python tests + ignore::pytest.PytestCollectionWarning # Get a debug traceback when a test takes a really long time faulthandler_timeout = 300 From 7a0352de23c7e26d55eac292c0dadae9550e4e1d Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 14 Sep 2022 12:59:29 +0200 Subject: [PATCH 04/18] Linter corrections --- python/pyarrow/_pyarrow_cpp_tests.pxd | 2 +- python/pyarrow/_pyarrow_cpp_tests.pyx | 9 +++------ python/pyarrow/tests/test_python.py | 2 ++ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index 5588010a786..788f856366b 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -29,7 +29,7 @@ cdef extern from "arrow/python/decimal.h" namespace "arrow::py::internal" nogil: CStatus PythonDecimalToString( PyObject* python_decimal, - c_string* out); + c_string* out) cdef cppclass DecimalMetadata: DecimalMetadata() diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index d254a36314b..44818d5a761 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -24,9 +24,6 @@ from pyarrow.lib cimport (check_status) from decimal import Decimal -#cdef create_python_decimal(c_string& string_value): -# cdef PyObject* decimal_value = DecimalFromString(Decimal, string_value) -# return PyObject_to_object(decimal_value) def test_PythonDecimalToString(): cdef: @@ -39,7 +36,7 @@ def test_PythonDecimalToString(): check_status(PythonDecimalToString( python_object, &string_result) - ) + ) assert string_result == decimal_string @@ -53,7 +50,7 @@ def test_InferPrecisionAndScale(): check_status(metadata.Update( python_object) - ) + ) assert expected_precision == metadata.precision() assert expected_scale == metadata.scale() @@ -68,7 +65,7 @@ def test_InferPrecisionAndNegativeScale(): check_status(metadata.Update( python_object) - ) + ) assert expected_precision == metadata.precision() assert expected_scale == metadata.scale() diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index ebaab4b452f..f2994595f24 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -38,8 +38,10 @@ def test_python_decimal_to_string(): test_PythonDecimalToString() + def test_infer_precision_and_scale(): test_InferPrecisionAndScale() + def test_infer_precision_and_negative_scale(): test_InferPrecisionAndNegativeScale() From 728158f3aeab76c89ea772225c6e09c4afb88d53 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 14 Sep 2022 14:06:33 +0200 Subject: [PATCH 05/18] Add other test from group TestInfer* --- python/pyarrow/_pyarrow_cpp_tests.pyx | 50 +++++++++++++++++++++++++++ python/pyarrow/tests/test_python.py | 22 ++++++++---- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index 44818d5a761..05c5a2164c9 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -40,6 +40,7 @@ def test_PythonDecimalToString(): assert string_result == decimal_string + def test_InferPrecisionAndScale(): cdef: c_string decimal_string = b'-394029506937548693.42983' @@ -55,6 +56,7 @@ def test_InferPrecisionAndScale(): assert expected_precision == metadata.precision() assert expected_scale == metadata.scale() + def test_InferPrecisionAndNegativeScale(): cdef: c_string decimal_string = b'-3.94042983E+10' @@ -69,3 +71,51 @@ def test_InferPrecisionAndNegativeScale(): assert expected_precision == metadata.precision() assert expected_scale == metadata.scale() + + +def test_TestInferAllLeadingZeros(): + cdef: + c_string decimal_string = b'0.001' + PyObject* python_object = DecimalFromString(Decimal, decimal_string) + DecimalMetadata metadata + int32_t expected_precision = 3 + int32_t expected_scale = 3 + + check_status(metadata.Update( + python_object) + ) + + assert expected_precision == metadata.precision() + assert expected_scale == metadata.scale() + + +def test_TestInferAllLeadingZerosExponentialNotationPositive(): + cdef: + c_string decimal_string = b'0.01E5' + PyObject* python_object = DecimalFromString(Decimal, decimal_string) + DecimalMetadata metadata + int32_t expected_precision = 4 + int32_t expected_scale = 0 + + check_status(metadata.Update( + python_object) + ) + + assert expected_precision == metadata.precision() + assert expected_scale == metadata.scale() + + +def test_TestInferAllLeadingZerosExponentialNotationNegative(): + cdef: + c_string decimal_string = b'0.01E3' + PyObject* python_object = DecimalFromString(Decimal, decimal_string) + DecimalMetadata metadata + int32_t expected_precision = 2 + int32_t expected_scale = 0 + + check_status(metadata.Update( + python_object) + ) + + assert expected_precision == metadata.precision() + assert expected_scale == metadata.scale() diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index f2994595f24..cb4c1b27a18 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -30,18 +30,28 @@ # if callable(item) and name.startswith("test_"): # setattr(sys.modules[__name__], name, item) -from pyarrow._pyarrow_cpp_tests import (test_PythonDecimalToString, # noqa - test_InferPrecisionAndScale, - test_InferPrecisionAndNegativeScale) +import pyarrow._pyarrow_cpp_tests as t # noqa def test_python_decimal_to_string(): - test_PythonDecimalToString() + t.test_PythonDecimalToString() def test_infer_precision_and_scale(): - test_InferPrecisionAndScale() + t.test_InferPrecisionAndScale() def test_infer_precision_and_negative_scale(): - test_InferPrecisionAndNegativeScale() + t.test_InferPrecisionAndNegativeScale() + + +def test_infer_all_leading_zeros(): + t.test_TestInferAllLeadingZeros() + + +def test_infer_all_leading_zeros_e_pos(): + t.test_TestInferAllLeadingZerosExponentialNotationPositive() + + +def test_infer_all_leading_zeros_e_neg(): + t.test_TestInferAllLeadingZerosExponentialNotationNegative() From d192a4538c1b43fa02d306b2321284c60d3598ee Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 20 Sep 2022 14:01:32 +0200 Subject: [PATCH 06/18] Remove GTest dependency from PyArrow, change tests staying in C++ to return a Status and run them from Pytest --- python/pyarrow/_pyarrow_cpp_tests.pxd | 14 + python/pyarrow/_pyarrow_cpp_tests.pyx | 37 +- python/pyarrow/src/CMakeLists.txt | 175 +-------- python/pyarrow/src/python_test.cc | 509 +++++++------------------- python/pyarrow/src/python_test.h | 53 +++ python/pyarrow/tests/test_python.py | 43 ++- 6 files changed, 270 insertions(+), 561 deletions(-) create mode 100644 python/pyarrow/src/python_test.h diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index 788f856366b..bc21f4a722a 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -40,3 +40,17 @@ cdef extern from "arrow/python/decimal.h" namespace "arrow::py::internal" nogil: int32_t precision() int32_t scale() + +cdef extern from "arrow/python/python_test.h" namespace "arrow::py" nogil: + + CStatus TestOwnedRefMoves() + CStatus TestOwnedRefNoGILMoves() + CStatus TestCheckPyErrorStatus() + CStatus TestCheckPyErrorStatusNoGIL() + CStatus TestRestorePyErrorBasics() + CStatus TestPyBufferInvalidInputObject() + + #ifdef _WIN32 + CStatus TestPyBufferNumpyArray() + CStatus TestNumPyBufferNumpyArray() + #endif \ No newline at end of file diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index 05c5a2164c9..b42bc415d53 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -25,6 +25,40 @@ from pyarrow.lib cimport (check_status) from decimal import Decimal +def test_TestOwnedRefMoves(): + check_status(TestOwnedRefMoves()) + + +def test_TestOwnedRefNoGILMoves(): + check_status(TestOwnedRefNoGILMoves()) + + +def test_TestCheckPyErrorStatus(): + check_status(TestCheckPyErrorStatus()) + + +def test_TestCheckPyErrorStatusNoGIL(): + check_status(TestCheckPyErrorStatusNoGIL()) + + +def test_TestRestorePyErrorBasics(): + check_status(TestRestorePyErrorBasics()) + + +def test_TestPyBufferInvalidInputObject(): + check_status(TestPyBufferInvalidInputObject()) + + +#ifdef _WIN32 +def test_TestPyBufferNumpyArray(): + check_status(TestPyBufferNumpyArray()) + + +def test_TestNumPyBufferNumpyArray(): + check_status(TestNumPyBufferNumpyArray()) +#endif + + def test_PythonDecimalToString(): cdef: c_string decimal_string = b'-39402950693754869342983' @@ -46,7 +80,8 @@ def test_InferPrecisionAndScale(): c_string decimal_string = b'-394029506937548693.42983' PyObject* python_object = DecimalFromString(Decimal, decimal_string) DecimalMetadata metadata - int32_t expected_precision = (decimal_string.size()) - 2 # 1 for -, 1 for . + # 1 for -, 1 for . + int32_t expected_precision = (decimal_string.size()) - 2 int32_t expected_scale = 5 check_status(metadata.Update( diff --git a/python/pyarrow/src/CMakeLists.txt b/python/pyarrow/src/CMakeLists.txt index 03c06d32b7a..f60b3e67ece 100644 --- a/python/pyarrow/src/CMakeLists.txt +++ b/python/pyarrow/src/CMakeLists.txt @@ -75,10 +75,7 @@ set(Python3_FIND_FRAMEWORK "LAST") find_package(Python3Alt 3.7 REQUIRED) include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} ${ARROW_INCLUDE_DIR} src) -add_custom_target(arrow_python-all) add_custom_target(arrow_python) -add_custom_target(arrow_python-tests) -add_dependencies(arrow_python-all arrow_python arrow_python-tests) set(ARROW_PYTHON_SRCS arrow_to_pandas.cc @@ -96,6 +93,7 @@ set(ARROW_PYTHON_SRCS ipc.cc numpy_convert.cc numpy_to_arrow.cc + python_test.cc python_to_arrow.cc pyarrow.cc serialize.cc @@ -312,174 +310,3 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL endif() arrow_install_all_headers("arrow/python") - -# ---------------------------------------------------------------------- - -# -# Tests -# The tests will be moved to Cython and are currently supported for bundled GTest -# Follow-up: https://issues.apache.org/jira/browse/ARROW-17016 -# - -if(ARROW_BUILD_TESTS) - - enable_testing() - set(GTEST_ROOT ${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix) - - # GTest must be built from source - if(EXISTS ${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix) - - # Set necessary paths for cmake to find GTest - set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/include") - set(GTEST_LIBRARY ${GTEST_ROOT}/lib) - set(GTEST_MAIN_LIBRARY ${GTEST_ROOT}/lib) - - # - # Taken from Matlab CMakeLists.txt (enable_gtest and build_gtest) - # - - set(ARROW_GTEST_PREFIX "${GTEST_ROOT}") - set(ARROW_GTEST_MAIN_PREFIX "${GTEST_ROOT}") - - if(WIN32) - set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/bin") - set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/bin") - - set(ARROW_GTEST_LINK_LIB_DIR "${ARROW_GTEST_PREFIX}/lib") - set(ARROW_GTEST_LINK_LIB - "${ARROW_GTEST_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtestd${CMAKE_IMPORT_LIBRARY_SUFFIX}" - ) - - set(ARROW_GTEST_MAIN_LINK_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib") - set(ARROW_GTEST_MAIN_LINK_LIB - "${ARROW_GTEST_MAIN_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtest_maind${CMAKE_IMPORT_LIBRARY_SUFFIX}" - ) - else() - set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/lib") - set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib") - endif() - - set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include") - set(ARROW_GTEST_SHARED_LIB - "${ARROW_GTEST_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtestd${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) - - set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include") - set(ARROW_GTEST_MAIN_SHARED_LIB - "${ARROW_GTEST_MAIN_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_maind${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) - - file(MAKE_DIRECTORY "${ARROW_GTEST_INCLUDE_DIR}") - - # Create target GTest::gtest - add_library(GTest::gtest SHARED IMPORTED) - set_target_properties(GTest::gtest - PROPERTIES IMPORTED_LOCATION ${ARROW_GTEST_SHARED_LIB} - INTERFACE_INCLUDE_DIRECTORIES - ${ARROW_GTEST_INCLUDE_DIR}) - if(WIN32) - set_target_properties(GTest::gtest PROPERTIES IMPORTED_IMPLIB ${ARROW_GTEST_LINK_LIB}) - endif() - - # ArrowTesting - # needed to be able to use arrow_testing_shared target - find_package(ArrowTesting REQUIRED) - - add_custom_target(all-tests) - - add_library(arrow_python_test_main STATIC util/test_main.cc) - - target_link_libraries(arrow_python_test_main GTest::gtest) - target_include_directories(arrow_python_test_main SYSTEM - PUBLIC ${ARROW_PYTHON_INCLUDES}) - - # Link libraries to avoid include error on Linux - if(ARROW_TEST_LINKAGE STREQUAL shared) - target_link_libraries(arrow_python_test_main Arrow::arrow_shared) - else() - target_link_libraries(arrow_python_test_main Arrow::arrow_static) - endif() - - if(APPLE) - target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS}) - set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS - "-undefined dynamic_lookup") - elseif(NOT MSVC) - target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS}) - endif() - - if(ARROW_TEST_LINKAGE STREQUAL shared) - set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_shared - ArrowTesting::arrow_testing_shared) - else() - set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_static - ArrowTesting::arrow_testing_static) - endif() - - # - # Add a test case - # - - set(REL_TEST_NAME "python_test") - get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) - set(TEST_NAME "arrow-${TEST_NAME}") - set(SOURCES "${REL_TEST_NAME}.cc") - - # # Make sure the executable name contains only hyphens, not underscores - string(REPLACE "_" "-" TEST_NAME ${TEST_NAME}) - - set(TEST_PATH "${CMAKE_BINARY_DIR}/${TEST_NAME}") - add_executable(${TEST_NAME} ${SOURCES}) - - # We need to set the correct RPATH so that dependencies - set_target_properties(${TEST_NAME} - PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE - INSTALL_RPATH_USE_LINK_PATH TRUE - INSTALL_RPATH - "${PYTHON_SOURCE_DIR}/pyarrow;$ENV{CONDA_PREFIX}/lib") - - # Customize link libraries - target_link_libraries(${TEST_NAME} PRIVATE "${ARROW_PYTHON_TEST_LINK_LIBS}") - # Extra link libs - target_link_libraries(${TEST_NAME} PRIVATE ${PYTHON_LIBRARIES}) - # Extra includes - target_include_directories(${TEST_NAME} SYSTEM PUBLIC "${ARROW_PYTHON_INCLUDES}") - - # Add the test - if(WIN32) - add_test(${TEST_NAME} ${TEST_PATH}) - else() - add_test(${TEST_NAME} - ${ARROW_CPP_SOURCE_DIR}/build-support/run-test.sh - ${CMAKE_BINARY_DIR} - test - ${TEST_PATH}) - endif() - - # Add test as dependency of relevant targets - add_dependencies(all-tests ${TEST_NAME}) - add_dependencies(arrow_python-tests ${TEST_NAME}) - - set(LABELS) - list(APPEND LABELS "unittest" arrow_python-tests) - - # ensure there is a cmake target which exercises tests with this LABEL - set(LABEL_TEST_NAME "test-arrow_python-tests") - if(NOT TARGET ${LABEL_TEST_NAME}) - add_custom_target(${LABEL_TEST_NAME} - ctest -L "${LABEL}" --output-on-failure - USES_TERMINAL) - endif() - # ensure the test is (re)built before the LABEL test runs - add_dependencies(${LABEL_TEST_NAME} ${TEST_NAME}) - - set_property(TEST ${TEST_NAME} - APPEND - PROPERTY LABELS ${LABELS}) - - else() - message(STATUS "Tests for PyArrow C++ not build") - message(STATUS "Set -DGTest_SOURCE=BUNDLED when building Arrow C++ - to enable building tests for PyArrow C++") - endif() -endif() diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index 865d4cf5c0f..bbda4d81846 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include "gtest/gtest.h" - #include #include #include @@ -27,7 +25,6 @@ #include "arrow/array.h" #include "arrow/array/builder_binary.h" #include "arrow/table.h" -#include "arrow/testing/gtest_util.h" #include "arrow/util/decimal.h" #include "arrow_to_pandas.h" @@ -36,16 +33,56 @@ #include "numpy_convert.h" #include "numpy_interop.h" #include "python_to_arrow.h" -#include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" -namespace arrow { +#define ASSERT_EQ(x, y) { \ + auto&& _left = (x); \ + auto&& _right = (y); \ + if (_left != _right) { \ + return Status::Invalid("Expected equality but ", _left, " != ", _right); \ + } \ +} +#define ASSERT_NE(x, y){ \ + auto&& _left = (x); \ + auto&& _right = (y); \ + if (_left == _right) { \ + return Status::Invalid("Expected inequality but ", _left, " == ", _right); \ + } \ +} +#define ASSERT_FALSE_PY(error){ \ + auto&& _err = (error); \ + if (_err != NULL){ \ + return Status::Invalid("An error occurred: ", _err); \ + } \ +} +#define ASSERT_TRUE_PY(error){ \ + auto&& _err = (error); \ + if (_err == NULL){ \ + return Status::Invalid("Expected an error but did not got one."); \ + } \ +} +#define ASSERT_FALSE(error){ \ + auto&& _err = (error); \ + if (_err != false){ \ + return Status::Invalid("An error occurred: ", _err); \ + } \ +} +#define ASSERT_TRUE(error){ \ + auto&& _err = (error); \ + if (_err == false){ \ + return Status::Invalid("Expected an error but did not got one."); \ + } \ +} +#define ASSERT_OK(expr){ \ + for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); !_st.ok();) \ + return Status::Invalid(expr, "failed with", _st.ToString()); \ +} -using internal::checked_cast; +namespace arrow { namespace py { -TEST(OwnedRef, TestMoves) { +Status TestOwnedRefMoves() { std::vector vec; PyObject *u, *v; u = PyList_New(0); @@ -59,9 +96,10 @@ TEST(OwnedRef, TestMoves) { vec.emplace_back(v); ASSERT_EQ(Py_REFCNT(u), 1); ASSERT_EQ(Py_REFCNT(v), 1); + return Status::OK(); } -TEST(OwnedRefNoGIL, TestMoves) { +Status TestOwnedRefNoGILMoves() { PyAcquireGIL lock; lock.release(); @@ -82,6 +120,7 @@ TEST(OwnedRefNoGIL, TestMoves) { vec.emplace_back(v); ASSERT_EQ(Py_REFCNT(u), 1); ASSERT_EQ(Py_REFCNT(v), 1); + return Status::OK(); } } @@ -92,47 +131,78 @@ std::string FormatPythonException(const std::string& exc_class_name) { return ss.str(); } -TEST(CheckPyError, TestStatus) { +Status TestCheckPyErrorStatus() { Status st; - - auto check_error = [](Status& st, const char* expected_message = "some error", - std::string expected_detail = "") { - st = CheckPyError(); - ASSERT_EQ(st.message(), expected_message); - ASSERT_FALSE(PyErr_Occurred()); - if (expected_detail.size() > 0) { - auto detail = st.detail(); - ASSERT_NE(detail, nullptr); - ASSERT_EQ(detail->ToString(), expected_detail); - } - }; + std::string expected_detail = ""; + + // auto check_error = [](Status& st, const char* expected_message = "some error", + // std::string expected_detail = "") { + // st = CheckPyError(); + // ASSERT_EQ(st.message(), expected_message); + // ASSERT_FALSE_PY(PyErr_Occurred()); + // if (expected_detail.size() > 0) { + // auto detail = st.detail(); + // ASSERT_NE(detail, nullptr); + // ASSERT_EQ(detail->ToString(), expected_detail); + // } + // }; for (PyObject* exc_type : {PyExc_Exception, PyExc_SyntaxError}) { PyErr_SetString(exc_type, "some error"); - check_error(st); + //check_error(st); + st = CheckPyError(); + ASSERT_EQ(st.message(), "some error"); + ASSERT_FALSE_PY(PyErr_Occurred()); ASSERT_TRUE(st.IsUnknownError()); } PyErr_SetString(PyExc_TypeError, "some error"); - check_error(st, "some error", FormatPythonException("TypeError")); + // check_error(st, "some error", FormatPythonException("TypeError")); + st = CheckPyError(); + expected_detail = FormatPythonException("TypeError"); + ASSERT_EQ(st.message(), "some error"); + ASSERT_FALSE_PY(PyErr_Occurred()); + if (expected_detail.size() > 0) { + auto detail = st.detail(); + ASSERT_NE(detail, nullptr); + ASSERT_EQ(detail->ToString(), expected_detail); + } ASSERT_TRUE(st.IsTypeError()); PyErr_SetString(PyExc_ValueError, "some error"); - check_error(st); + // check_error(st); + st = CheckPyError(); + ASSERT_EQ(st.message(), "some error"); + ASSERT_FALSE_PY(PyErr_Occurred()); ASSERT_TRUE(st.IsInvalid()); PyErr_SetString(PyExc_KeyError, "some error"); - check_error(st, "'some error'"); + // check_error(st, "'some error'"); + st = CheckPyError(); + ASSERT_EQ(st.message(), "'some error'"); + ASSERT_FALSE_PY(PyErr_Occurred()); ASSERT_TRUE(st.IsKeyError()); for (PyObject* exc_type : {PyExc_OSError, PyExc_IOError}) { PyErr_SetString(exc_type, "some error"); - check_error(st); + // check_error(st); + st = CheckPyError(); + ASSERT_EQ(st.message(), "some error"); + ASSERT_FALSE_PY(PyErr_Occurred()); ASSERT_TRUE(st.IsIOError()); } PyErr_SetString(PyExc_NotImplementedError, "some error"); - check_error(st, "some error", FormatPythonException("NotImplementedError")); + // check_error(st, "some error", FormatPythonException("NotImplementedError")); + st = CheckPyError(); + expected_detail = FormatPythonException("NotImplementedError"); + ASSERT_EQ(st.message(), "some error"); + ASSERT_FALSE_PY(PyErr_Occurred()); + if (expected_detail.size() > 0) { + auto detail = st.detail(); + ASSERT_NE(detail, nullptr); + ASSERT_EQ(detail->ToString(), expected_detail); + } ASSERT_TRUE(st.IsNotImplemented()); // No override if a specific status code is given @@ -140,33 +210,36 @@ TEST(CheckPyError, TestStatus) { st = CheckPyError(StatusCode::SerializationError); ASSERT_TRUE(st.IsSerializationError()); ASSERT_EQ(st.message(), "some error"); - ASSERT_FALSE(PyErr_Occurred()); + ASSERT_FALSE_PY(PyErr_Occurred()); + + return Status::OK(); } -TEST(CheckPyError, TestStatusNoGIL) { +Status TestCheckPyErrorStatusNoGIL() { PyAcquireGIL lock; { Status st; PyErr_SetString(PyExc_ZeroDivisionError, "zzzt"); st = ConvertPyError(); - ASSERT_FALSE(PyErr_Occurred()); + ASSERT_FALSE_PY(PyErr_Occurred()); lock.release(); ASSERT_TRUE(st.IsUnknownError()); ASSERT_EQ(st.message(), "zzzt"); ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError")); + return Status::OK(); } } -TEST(RestorePyError, Basics) { +Status TestRestorePyErrorBasics() { PyErr_SetString(PyExc_ZeroDivisionError, "zzzt"); auto st = ConvertPyError(); - ASSERT_FALSE(PyErr_Occurred()); + ASSERT_FALSE_PY(PyErr_Occurred()); ASSERT_TRUE(st.IsUnknownError()); ASSERT_EQ(st.message(), "zzzt"); ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError")); RestorePyError(st); - ASSERT_TRUE(PyErr_Occurred()); + ASSERT_TRUE_PY(PyErr_Occurred()); PyObject* exc_type; PyObject* exc_value; PyObject* exc_traceback; @@ -175,25 +248,28 @@ TEST(RestorePyError, Basics) { std::string py_message; ASSERT_OK(internal::PyObject_StdStringStr(exc_value, &py_message)); ASSERT_EQ(py_message, "zzzt"); + + return Status::OK(); } -TEST(PyBuffer, InvalidInputObject) { +Status TestPyBufferInvalidInputObject() { std::shared_ptr res; PyObject* input = Py_None; auto old_refcnt = Py_REFCNT(input); { Status st = PyBuffer::FromPyObject(input).status(); - ASSERT_TRUE(IsPyError(st)) << st.ToString(); - ASSERT_FALSE(PyErr_Occurred()); + // ASSERT_TRUE(IsPyError(st)) << st.ToString(); + ASSERT_FALSE_PY(PyErr_Occurred()); } ASSERT_EQ(old_refcnt, Py_REFCNT(input)); + return Status::OK(); } // Because of how it is declared, the Numpy C API instance initialized // within libarrow_python.dll may not be visible in this test under Windows // ("unresolved external symbol arrow_ARRAY_API referenced"). #ifndef _WIN32 -TEST(PyBuffer, NumpyArray) { +Status TestPyBufferNumpyArray() { const npy_intp dims[1] = {10}; OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT)); @@ -201,7 +277,10 @@ TEST(PyBuffer, NumpyArray) { ASSERT_NE(arr, nullptr); auto old_refcnt = Py_REFCNT(arr); - ASSERT_OK_AND_ASSIGN(auto buf, PyBuffer::FromPyObject(arr)); + // ASSERT_OK_AND_ASSIGN(auto buf, PyBuffer::FromPyObject(arr)); + // ASSERT_OK(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__).status()) + auto buf = std::move(PyBuffer::FromPyObject(arr)).ValueOrDie(); + ASSERT_TRUE(buf->is_cpu()); ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast(arr))); ASSERT_TRUE(buf->is_mutable()); @@ -212,16 +291,19 @@ TEST(PyBuffer, NumpyArray) { // Read-only PyArray_CLEARFLAGS(reinterpret_cast(arr), NPY_ARRAY_WRITEABLE); - ASSERT_OK_AND_ASSIGN(buf, PyBuffer::FromPyObject(arr)); + // ASSERT_OK_AND_ASSIGN(buf, PyBuffer::FromPyObject(arr)); + buf = std::move(PyBuffer::FromPyObject(arr)).ValueOrDie(); ASSERT_TRUE(buf->is_cpu()); ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast(arr))); ASSERT_FALSE(buf->is_mutable()); ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr)); buf.reset(); ASSERT_EQ(old_refcnt, Py_REFCNT(arr)); + + return Status::OK(); } -TEST(NumPyBuffer, NumpyArray) { +Status TestNumPyBufferNumpyArray() { npy_intp dims[1] = {10}; OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT)); @@ -247,353 +329,10 @@ TEST(NumPyBuffer, NumpyArray) { ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr)); buf.reset(); ASSERT_EQ(old_refcnt, Py_REFCNT(arr)); -} -#endif - -class DecimalTest : public ::testing::Test { - public: - DecimalTest() : lock_(), decimal_constructor_() { - OwnedRef decimal_module; - - Status status = internal::ImportModule("decimal", &decimal_module); - ARROW_CHECK_OK(status); - - status = internal::ImportFromModule(decimal_module.obj(), "Decimal", - &decimal_constructor_); - ARROW_CHECK_OK(status); - } - OwnedRef CreatePythonDecimal(const std::string& string_value) { - OwnedRef ref(internal::DecimalFromString(decimal_constructor_.obj(), string_value)); - return ref; - } - - PyObject* decimal_constructor() const { return decimal_constructor_.obj(); } - - private: - PyAcquireGIL lock_; - OwnedRef decimal_constructor_; -}; - -TEST_F(DecimalTest, TestPythonDecimalToString) { - std::string decimal_string("-39402950693754869342983"); - - OwnedRef python_object(this->CreatePythonDecimal(decimal_string)); - ASSERT_NE(python_object.obj(), nullptr); - - std::string string_result; - ASSERT_OK(internal::PythonDecimalToString(python_object.obj(), &string_result)); -} - -TEST_F(DecimalTest, TestInferPrecisionAndScale) { - std::string decimal_string("-394029506937548693.42983"); - OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(python_decimal.obj())); - - const auto expected_precision = - static_cast(decimal_string.size() - 2); // 1 for -, 1 for . - const int32_t expected_scale = 5; - - ASSERT_EQ(expected_precision, metadata.precision()); - ASSERT_EQ(expected_scale, metadata.scale()); -} - -TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) { - std::string decimal_string("-3.94042983E+10"); - OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(python_decimal.obj())); - - const auto expected_precision = 11; - const int32_t expected_scale = 0; - - ASSERT_EQ(expected_precision, metadata.precision()); - ASSERT_EQ(expected_scale, metadata.scale()); -} - -TEST_F(DecimalTest, TestInferAllLeadingZeros) { - std::string decimal_string("0.001"); - OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(python_decimal.obj())); - ASSERT_EQ(3, metadata.precision()); - ASSERT_EQ(3, metadata.scale()); -} - -TEST_F(DecimalTest, TestInferAllLeadingZerosExponentialNotationPositive) { - std::string decimal_string("0.01E5"); - OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(python_decimal.obj())); - ASSERT_EQ(4, metadata.precision()); - ASSERT_EQ(0, metadata.scale()); -} - -TEST_F(DecimalTest, TestInferAllLeadingZerosExponentialNotationNegative) { - std::string decimal_string("0.01E3"); - OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(python_decimal.obj())); - ASSERT_EQ(2, metadata.precision()); - ASSERT_EQ(0, metadata.scale()); -} - -TEST(PandasConversionTest, TestObjectBlockWriteFails) { - StringBuilder builder; - const char value[] = {'\xf1', '\0'}; - - for (int i = 0; i < 1000; ++i) { - ASSERT_OK(builder.Append(value, static_cast(strlen(value)))); - } - - std::shared_ptr arr; - ASSERT_OK(builder.Finish(&arr)); - - auto f1 = field("f1", utf8()); - auto f2 = field("f2", utf8()); - auto f3 = field("f3", utf8()); - std::vector> fields = {f1, f2, f3}; - std::vector> cols = {arr, arr, arr}; - - auto schema = ::arrow::schema(fields); - auto table = Table::Make(schema, cols); - - Status st; - Py_BEGIN_ALLOW_THREADS; - PyObject* out; - PandasOptions options; - options.use_threads = true; - st = ConvertTableToPandas(options, table, &out); - Py_END_ALLOW_THREADS; - ASSERT_RAISES(UnknownError, st); -} - -TEST(BuiltinConversionTest, TestMixedTypeFails) { - OwnedRef list_ref(PyList_New(3)); - PyObject* list = list_ref.obj(); - - ASSERT_NE(list, nullptr); - - PyObject* str = PyUnicode_FromString("abc"); - ASSERT_NE(str, nullptr); - - PyObject* integer = PyLong_FromLong(1234L); - ASSERT_NE(integer, nullptr); - - PyObject* doub = PyFloat_FromDouble(123.0234); - ASSERT_NE(doub, nullptr); - - // This steals a reference to each object, so we don't need to decref them later - // just the list - ASSERT_EQ(PyList_SetItem(list, 0, str), 0); - ASSERT_EQ(PyList_SetItem(list, 1, integer), 0); - ASSERT_EQ(PyList_SetItem(list, 2, doub), 0); - - ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, {})); -} - -template -void DecimalTestFromPythonDecimalRescale(std::shared_ptr type, - OwnedRef python_decimal, - std::optional expected) { - DecimalValue value; - const auto& decimal_type = checked_cast(*type); - - if (expected.has_value()) { - ASSERT_OK( - internal::DecimalFromPythonDecimal(python_decimal.obj(), decimal_type, &value)); - ASSERT_EQ(expected.value(), value); - - ASSERT_OK(internal::DecimalFromPyObject(python_decimal.obj(), decimal_type, &value)); - ASSERT_EQ(expected.value(), value); - } else { - ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal.obj(), - decimal_type, &value)); - ASSERT_RAISES(Invalid, internal::DecimalFromPyObject(python_decimal.obj(), - decimal_type, &value)); - } -} - -TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) { - // We fail when truncating values that would lose data if cast to a decimal type with - // lower scale - DecimalTestFromPythonDecimalRescale(::arrow::decimal128(10, 2), - this->CreatePythonDecimal("1.001"), {}); - DecimalTestFromPythonDecimalRescale(::arrow::decimal256(10, 2), - this->CreatePythonDecimal("1.001"), {}); -} - -TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable) { - // We allow truncation of values that do not lose precision when dividing by 10 * the - // difference between the scales, e.g., 1.000 -> 1.00 - DecimalTestFromPythonDecimalRescale( - ::arrow::decimal128(10, 2), this->CreatePythonDecimal("1.000"), 100); - DecimalTestFromPythonDecimalRescale( - ::arrow::decimal256(10, 2), this->CreatePythonDecimal("1.000"), 100); -} - -TEST_F(DecimalTest, FromPythonNegativeDecimalRescale) { - DecimalTestFromPythonDecimalRescale( - ::arrow::decimal128(10, 9), this->CreatePythonDecimal("-1.000"), -1000000000); - DecimalTestFromPythonDecimalRescale( - ::arrow::decimal256(10, 9), this->CreatePythonDecimal("-1.000"), -1000000000); -} - -TEST_F(DecimalTest, Decimal128FromPythonInteger) { - Decimal128 value; - OwnedRef python_long(PyLong_FromLong(42)); - auto type = ::arrow::decimal128(10, 2); - const auto& decimal_type = checked_cast(*type); - ASSERT_OK(internal::DecimalFromPyObject(python_long.obj(), decimal_type, &value)); - ASSERT_EQ(4200, value); -} - -TEST_F(DecimalTest, Decimal256FromPythonInteger) { - Decimal256 value; - OwnedRef python_long(PyLong_FromLong(42)); - auto type = ::arrow::decimal256(10, 2); - const auto& decimal_type = checked_cast(*type); - ASSERT_OK(internal::DecimalFromPyObject(python_long.obj(), decimal_type, &value)); - ASSERT_EQ(4200, value); -} - -TEST_F(DecimalTest, TestDecimal128OverflowFails) { - Decimal128 value; - OwnedRef python_decimal( - this->CreatePythonDecimal("9999999999999999999999999999999999999.9")); - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(python_decimal.obj())); - ASSERT_EQ(38, metadata.precision()); - ASSERT_EQ(1, metadata.scale()); - - auto type = ::arrow::decimal(38, 38); - const auto& decimal_type = checked_cast(*type); - ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal.obj(), - decimal_type, &value)); -} - -TEST_F(DecimalTest, TestDecimal256OverflowFails) { - Decimal256 value; - OwnedRef python_decimal(this->CreatePythonDecimal( - "999999999999999999999999999999999999999999999999999999999999999999999999999.9")); - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(python_decimal.obj())); - ASSERT_EQ(76, metadata.precision()); - ASSERT_EQ(1, metadata.scale()); - - auto type = ::arrow::decimal(76, 76); - const auto& decimal_type = checked_cast(*type); - ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal.obj(), - decimal_type, &value)); -} - -TEST_F(DecimalTest, TestNoneAndNaN) { - OwnedRef list_ref(PyList_New(4)); - PyObject* list = list_ref.obj(); - - ASSERT_NE(list, nullptr); - - PyObject* constructor = this->decimal_constructor(); - PyObject* decimal_value = internal::DecimalFromString(constructor, "1.234"); - ASSERT_NE(decimal_value, nullptr); - - Py_INCREF(Py_None); - PyObject* missing_value1 = Py_None; - ASSERT_NE(missing_value1, nullptr); - - PyObject* missing_value2 = PyFloat_FromDouble(NPY_NAN); - ASSERT_NE(missing_value2, nullptr); - - PyObject* missing_value3 = internal::DecimalFromString(constructor, "nan"); - ASSERT_NE(missing_value3, nullptr); - - // This steals a reference to each object, so we don't need to decref them later, - // just the list - ASSERT_EQ(0, PyList_SetItem(list, 0, decimal_value)); - ASSERT_EQ(0, PyList_SetItem(list, 1, missing_value1)); - ASSERT_EQ(0, PyList_SetItem(list, 2, missing_value2)); - ASSERT_EQ(0, PyList_SetItem(list, 3, missing_value3)); - - PyConversionOptions options; - ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, options)); - - options.from_pandas = true; - ASSERT_OK_AND_ASSIGN(auto chunked, ConvertPySequence(list, nullptr, options)); - ASSERT_EQ(chunked->num_chunks(), 1); - - auto arr = chunked->chunk(0); - ASSERT_TRUE(arr->IsValid(0)); - ASSERT_TRUE(arr->IsNull(1)); - ASSERT_TRUE(arr->IsNull(2)); - ASSERT_TRUE(arr->IsNull(3)); -} - -TEST_F(DecimalTest, TestMixedPrecisionAndScale) { - std::vector strings{{"0.001", "1.01E5", "1.01E5"}}; - - OwnedRef list_ref(PyList_New(static_cast(strings.size()))); - PyObject* list = list_ref.obj(); - - ASSERT_NE(list, nullptr); - - // PyList_SetItem steals a reference to the item so we don't decref it later - PyObject* decimal_constructor = this->decimal_constructor(); - for (Py_ssize_t i = 0; i < static_cast(strings.size()); ++i) { - const int result = PyList_SetItem( - list, i, internal::DecimalFromString(decimal_constructor, strings.at(i))); - ASSERT_EQ(0, result); - } - - ASSERT_OK_AND_ASSIGN(auto arr, ConvertPySequence(list, nullptr, {})) - const auto& type = checked_cast(*arr->type()); - - int32_t expected_precision = 9; - int32_t expected_scale = 3; - ASSERT_EQ(expected_precision, type.precision()); - ASSERT_EQ(expected_scale, type.scale()); -} - -TEST_F(DecimalTest, TestMixedPrecisionAndScaleSequenceConvert) { - PyObject* value1 = this->CreatePythonDecimal("0.01").detach(); - ASSERT_NE(value1, nullptr); - - PyObject* value2 = this->CreatePythonDecimal("0.001").detach(); - ASSERT_NE(value2, nullptr); - - OwnedRef list_ref(PyList_New(2)); - PyObject* list = list_ref.obj(); - - // This steals a reference to each object, so we don't need to decref them later - // just the list - ASSERT_EQ(PyList_SetItem(list, 0, value1), 0); - ASSERT_EQ(PyList_SetItem(list, 1, value2), 0); - - ASSERT_OK_AND_ASSIGN(auto arr, ConvertPySequence(list, nullptr, {})); - const auto& type = checked_cast(*arr->type()); - ASSERT_EQ(3, type.precision()); - ASSERT_EQ(3, type.scale()); -} - -TEST_F(DecimalTest, SimpleInference) { - OwnedRef value(this->CreatePythonDecimal("0.01")); - ASSERT_NE(value.obj(), nullptr); - internal::DecimalMetadata metadata; - ASSERT_OK(metadata.Update(value.obj())); - ASSERT_EQ(2, metadata.precision()); - ASSERT_EQ(2, metadata.scale()); -} - -TEST_F(DecimalTest, UpdateWithNaN) { - internal::DecimalMetadata metadata; - OwnedRef nan_value(this->CreatePythonDecimal("nan")); - ASSERT_OK(metadata.Update(nan_value.obj())); - ASSERT_EQ(std::numeric_limits::min(), metadata.precision()); - ASSERT_EQ(std::numeric_limits::min(), metadata.scale()); + return Status::OK(); } +#endif } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/python_test.h b/python/pyarrow/src/python_test.h new file mode 100644 index 00000000000..b48cf79a430 --- /dev/null +++ b/python/pyarrow/src/python_test.h @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "visibility.h" +#include "arrow/type.h" + +namespace arrow { +namespace py { + +ARROW_PYTHON_EXPORT +Status TestOwnedRefMoves(); + +ARROW_PYTHON_EXPORT +Status TestOwnedRefNoGILMoves(); + +ARROW_PYTHON_EXPORT +Status TestCheckPyErrorStatus(); + +ARROW_PYTHON_EXPORT +Status TestCheckPyErrorStatusNoGIL(); + +ARROW_PYTHON_EXPORT +Status TestRestorePyErrorBasics(); + +ARROW_PYTHON_EXPORT +Status TestPyBufferInvalidInputObject(); + +#ifndef _WIN32 +ARROW_PYTHON_EXPORT +Status TestPyBufferNumpyArray(); + +ARROW_PYTHON_EXPORT +Status TestNumPyBufferNumpyArray(); +#endif + +} // namespace py +} // namespace arrow \ No newline at end of file diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index cb4c1b27a18..16f8244a48f 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -30,7 +30,48 @@ # if callable(item) and name.startswith("test_"): # setattr(sys.modules[__name__], name, item) -import pyarrow._pyarrow_cpp_tests as t # noqa +import pyarrow._pyarrow_cpp_tests as t # noqa + +import pytest +import sys + + +def test_owned_ref_moves(): + t.test_TestOwnedRefMoves() + + +def test_owned_ref_no_gil_moves(): + t.test_TestOwnedRefNoGILMoves() + + +def test_check_py_error_status(): + t.test_TestCheckPyErrorStatus() + + +def test_check_py_error_status_no_gil(): + t.test_TestCheckPyErrorStatusNoGIL() + + +def test_restore_py_error_basics(): + t.test_TestRestorePyErrorBasics() + + +def test_py_buffer_invalid_input_object(): + t.test_TestPyBufferInvalidInputObject() + + +@pytest.mark.skipif(sys.platform == 'win32', + reason="C++ test are skipped on Windows due to " + "the Numpy C API instance not being visible") +def test_py_buffer_numpy_array(): + t.test_TestPyBufferNumpyArray() + + +@pytest.mark.skipif(sys.platform == 'win32', + reason="C++ test are skipped on Windows due to " + "the Numpy C API instance not being visible") +def test_numpy_buffer_numpy_array(): + t.test_TestNumPyBufferNumpyArray() def test_python_decimal_to_string(): From 03e8f3b77ac2e9a4ace828afbc3cc2f7bf4cc7c0 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 21 Sep 2022 09:38:21 +0200 Subject: [PATCH 07/18] Linter corrections --- python/pyarrow/_pyarrow_cpp_tests.pxd | 4 ++-- python/pyarrow/_pyarrow_cpp_tests.pyx | 4 ++-- python/pyarrow/src/python_test.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index bc21f4a722a..1defd350c1e 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -50,7 +50,7 @@ cdef extern from "arrow/python/python_test.h" namespace "arrow::py" nogil: CStatus TestRestorePyErrorBasics() CStatus TestPyBufferInvalidInputObject() - #ifdef _WIN32 + # ifdef _WIN32 CStatus TestPyBufferNumpyArray() CStatus TestNumPyBufferNumpyArray() - #endif \ No newline at end of file + # endif diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index b42bc415d53..7e01525af3a 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -49,14 +49,14 @@ def test_TestPyBufferInvalidInputObject(): check_status(TestPyBufferInvalidInputObject()) -#ifdef _WIN32 +# ifdef _WIN32 def test_TestPyBufferNumpyArray(): check_status(TestPyBufferNumpyArray()) def test_TestNumPyBufferNumpyArray(): check_status(TestNumPyBufferNumpyArray()) -#endif +# endif def test_PythonDecimalToString(): diff --git a/python/pyarrow/src/python_test.h b/python/pyarrow/src/python_test.h index b48cf79a430..d472f51c50d 100644 --- a/python/pyarrow/src/python_test.h +++ b/python/pyarrow/src/python_test.h @@ -50,4 +50,4 @@ Status TestNumPyBufferNumpyArray(); #endif } // namespace py -} // namespace arrow \ No newline at end of file +} // namespace arrow From 1d187e97f12dce2bd5da61d8878e61c8c90a9aaa Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 21 Sep 2022 15:56:27 +0200 Subject: [PATCH 08/18] Make the check_error inline lambda function working --- python/pyarrow/src/python_test.cc | 66 +++++++++---------------------- 1 file changed, 19 insertions(+), 47 deletions(-) diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index bbda4d81846..4a1482f8436 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -135,74 +135,45 @@ Status TestCheckPyErrorStatus() { Status st; std::string expected_detail = ""; - // auto check_error = [](Status& st, const char* expected_message = "some error", - // std::string expected_detail = "") { - // st = CheckPyError(); - // ASSERT_EQ(st.message(), expected_message); - // ASSERT_FALSE_PY(PyErr_Occurred()); - // if (expected_detail.size() > 0) { - // auto detail = st.detail(); - // ASSERT_NE(detail, nullptr); - // ASSERT_EQ(detail->ToString(), expected_detail); - // } - // }; + auto check_error = [](Status& st, const char* expected_message = "some error", + std::string expected_detail = "") { + st = CheckPyError(); + ASSERT_EQ(st.message(), expected_message); + ASSERT_FALSE_PY(PyErr_Occurred()); + if (expected_detail.size() > 0) { + auto detail = st.detail(); + ASSERT_NE(detail, nullptr); + ASSERT_EQ(detail->ToString(), expected_detail); + } + return Status::OK(); + }; for (PyObject* exc_type : {PyExc_Exception, PyExc_SyntaxError}) { PyErr_SetString(exc_type, "some error"); - //check_error(st); - st = CheckPyError(); - ASSERT_EQ(st.message(), "some error"); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_OK(check_error(st)); ASSERT_TRUE(st.IsUnknownError()); } PyErr_SetString(PyExc_TypeError, "some error"); - // check_error(st, "some error", FormatPythonException("TypeError")); - st = CheckPyError(); - expected_detail = FormatPythonException("TypeError"); - ASSERT_EQ(st.message(), "some error"); - ASSERT_FALSE_PY(PyErr_Occurred()); - if (expected_detail.size() > 0) { - auto detail = st.detail(); - ASSERT_NE(detail, nullptr); - ASSERT_EQ(detail->ToString(), expected_detail); - } + ASSERT_OK(check_error(st, "some error", FormatPythonException("TypeError"))); ASSERT_TRUE(st.IsTypeError()); PyErr_SetString(PyExc_ValueError, "some error"); - // check_error(st); - st = CheckPyError(); - ASSERT_EQ(st.message(), "some error"); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_OK(check_error(st)); ASSERT_TRUE(st.IsInvalid()); PyErr_SetString(PyExc_KeyError, "some error"); - // check_error(st, "'some error'"); - st = CheckPyError(); - ASSERT_EQ(st.message(), "'some error'"); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_OK(check_error(st, "'some error'")); ASSERT_TRUE(st.IsKeyError()); for (PyObject* exc_type : {PyExc_OSError, PyExc_IOError}) { PyErr_SetString(exc_type, "some error"); - // check_error(st); - st = CheckPyError(); - ASSERT_EQ(st.message(), "some error"); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_OK(check_error(st)); ASSERT_TRUE(st.IsIOError()); } PyErr_SetString(PyExc_NotImplementedError, "some error"); - // check_error(st, "some error", FormatPythonException("NotImplementedError")); - st = CheckPyError(); - expected_detail = FormatPythonException("NotImplementedError"); - ASSERT_EQ(st.message(), "some error"); - ASSERT_FALSE_PY(PyErr_Occurred()); - if (expected_detail.size() > 0) { - auto detail = st.detail(); - ASSERT_NE(detail, nullptr); - ASSERT_EQ(detail->ToString(), expected_detail); - } + ASSERT_OK(check_error(st, "some error", FormatPythonException("NotImplementedError"))); ASSERT_TRUE(st.IsNotImplemented()); // No override if a specific status code is given @@ -259,6 +230,7 @@ Status TestPyBufferInvalidInputObject() { { Status st = PyBuffer::FromPyObject(input).status(); // ASSERT_TRUE(IsPyError(st)) << st.ToString(); + ASSERT_TRUE(IsPyError(st)); ASSERT_FALSE_PY(PyErr_Occurred()); } ASSERT_EQ(old_refcnt, Py_REFCNT(input)); From f4a955441e2e7741a8a2d657926e4a8abbbdba56 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Fri, 23 Sep 2022 09:05:45 +0200 Subject: [PATCH 09/18] Add a macro for ASSERT_TRUE that prints and extra message --- python/pyarrow/src/python_test.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index 4a1482f8436..18fce4a9be3 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -73,6 +73,13 @@ return Status::Invalid("Expected an error but did not got one."); \ } \ } +#define ASSERT_TRUE_MSG(error, msg){ \ + auto&& _err = (error); \ + auto&& _msg = (msg); \ + if (_err == false){ \ + return Status::Invalid("Expected an error but did not got one: ", _msg); \ + } \ +} #define ASSERT_OK(expr){ \ for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); !_st.ok();) \ return Status::Invalid(expr, "failed with", _st.ToString()); \ @@ -229,8 +236,7 @@ Status TestPyBufferInvalidInputObject() { auto old_refcnt = Py_REFCNT(input); { Status st = PyBuffer::FromPyObject(input).status(); - // ASSERT_TRUE(IsPyError(st)) << st.ToString(); - ASSERT_TRUE(IsPyError(st)); + ASSERT_TRUE_MSG(IsPyError(st), st.ToString()); ASSERT_FALSE_PY(PyErr_Occurred()); } ASSERT_EQ(old_refcnt, Py_REFCNT(input)); From 10676fe3e64238736a7e7a81422340dbc73c0297 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Fri, 23 Sep 2022 12:35:36 +0200 Subject: [PATCH 10/18] Change Cython tests back to C++ with returning a Status --- python/pyarrow/_pyarrow_cpp_tests.pxd | 29 ++---- python/pyarrow/_pyarrow_cpp_tests.pyx | 92 ++---------------- python/pyarrow/src/python_test.cc | 131 +++++++++++++++++++++++++- python/pyarrow/src/python_test.h | 18 ++++ python/pyarrow/tests/test_python.py | 6 +- 5 files changed, 165 insertions(+), 111 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index 1defd350c1e..397e1d01afe 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -19,27 +19,7 @@ # cython: language_level = 3 from pyarrow.includes.common cimport * -from pyarrow.includes.libarrow cimport (c_string, CStatus) - -cdef extern from "arrow/python/decimal.h" namespace "arrow::py::internal" nogil: - - PyObject* DecimalFromString( - PyObject* decimal_constructor, - const c_string& decimal_string) - - CStatus PythonDecimalToString( - PyObject* python_decimal, - c_string* out) - - cdef cppclass DecimalMetadata: - DecimalMetadata() - DecimalMetadata(int32_t precision, int32_t scale) - - CStatus Update(int32_t suggested_precision, int32_t suggested_scale) - CStatus Update(PyObject* object) - - int32_t precision() - int32_t scale() +from pyarrow.includes.libarrow cimport CStatus cdef extern from "arrow/python/python_test.h" namespace "arrow::py" nogil: @@ -54,3 +34,10 @@ cdef extern from "arrow/python/python_test.h" namespace "arrow::py" nogil: CStatus TestPyBufferNumpyArray() CStatus TestNumPyBufferNumpyArray() # endif + + CStatus TestPythonDecimalToString() + CStatus TestInferPrecisionAndScale() + CStatus TestInferPrecisionAndNegativeScale() + CStatus TestInferAllLeadingZeros() + CStatus TestInferAllLeadingZerosExponentialNotationPositive() + CStatus TestInferAllLeadingZerosExponentialNotationNegative() diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index 7e01525af3a..d3191071e09 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -59,98 +59,24 @@ def test_TestNumPyBufferNumpyArray(): # endif -def test_PythonDecimalToString(): - cdef: - c_string decimal_string = b'-39402950693754869342983' - PyObject* python_object = DecimalFromString(Decimal, decimal_string) - c_string string_result +def test_TestPythonDecimalToString(): + check_status(TestPythonDecimalToString()) - # Should a check be added here that python_object != nullptr ? +def test_TestInferPrecisionAndScale(): + check_status(TestInferPrecisionAndScale()) - check_status(PythonDecimalToString( - python_object, - &string_result) - ) - assert string_result == decimal_string - - -def test_InferPrecisionAndScale(): - cdef: - c_string decimal_string = b'-394029506937548693.42983' - PyObject* python_object = DecimalFromString(Decimal, decimal_string) - DecimalMetadata metadata - # 1 for -, 1 for . - int32_t expected_precision = (decimal_string.size()) - 2 - int32_t expected_scale = 5 - - check_status(metadata.Update( - python_object) - ) - - assert expected_precision == metadata.precision() - assert expected_scale == metadata.scale() - - -def test_InferPrecisionAndNegativeScale(): - cdef: - c_string decimal_string = b'-3.94042983E+10' - PyObject* python_object = DecimalFromString(Decimal, decimal_string) - DecimalMetadata metadata - int32_t expected_precision = 11 - int32_t expected_scale = 0 - - check_status(metadata.Update( - python_object) - ) - - assert expected_precision == metadata.precision() - assert expected_scale == metadata.scale() +def test_TestInferPrecisionAndNegativeScale(): + check_status(TestInferPrecisionAndNegativeScale()) def test_TestInferAllLeadingZeros(): - cdef: - c_string decimal_string = b'0.001' - PyObject* python_object = DecimalFromString(Decimal, decimal_string) - DecimalMetadata metadata - int32_t expected_precision = 3 - int32_t expected_scale = 3 - - check_status(metadata.Update( - python_object) - ) - - assert expected_precision == metadata.precision() - assert expected_scale == metadata.scale() + check_status(TestInferAllLeadingZeros()) def test_TestInferAllLeadingZerosExponentialNotationPositive(): - cdef: - c_string decimal_string = b'0.01E5' - PyObject* python_object = DecimalFromString(Decimal, decimal_string) - DecimalMetadata metadata - int32_t expected_precision = 4 - int32_t expected_scale = 0 - - check_status(metadata.Update( - python_object) - ) - - assert expected_precision == metadata.precision() - assert expected_scale == metadata.scale() + check_status(TestInferAllLeadingZerosExponentialNotationPositive()) def test_TestInferAllLeadingZerosExponentialNotationNegative(): - cdef: - c_string decimal_string = b'0.01E3' - PyObject* python_object = DecimalFromString(Decimal, decimal_string) - DecimalMetadata metadata - int32_t expected_precision = 2 - int32_t expected_scale = 0 - - check_status(metadata.Update( - python_object) - ) - - assert expected_precision == metadata.precision() - assert expected_scale == metadata.scale() + check_status(TestInferAllLeadingZerosExponentialNotationNegative()) diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index 18fce4a9be3..003cbf4e5f9 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -254,9 +254,6 @@ Status TestPyBufferNumpyArray() { PyObject* arr = arr_ref.obj(); ASSERT_NE(arr, nullptr); auto old_refcnt = Py_REFCNT(arr); - - // ASSERT_OK_AND_ASSIGN(auto buf, PyBuffer::FromPyObject(arr)); - // ASSERT_OK(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__).status()) auto buf = std::move(PyBuffer::FromPyObject(arr)).ValueOrDie(); ASSERT_TRUE(buf->is_cpu()); @@ -269,7 +266,6 @@ Status TestPyBufferNumpyArray() { // Read-only PyArray_CLEARFLAGS(reinterpret_cast(arr), NPY_ARRAY_WRITEABLE); - // ASSERT_OK_AND_ASSIGN(buf, PyBuffer::FromPyObject(arr)); buf = std::move(PyBuffer::FromPyObject(arr)).ValueOrDie(); ASSERT_TRUE(buf->is_cpu()); ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast(arr))); @@ -312,5 +308,132 @@ Status TestNumPyBufferNumpyArray() { } #endif +Status TestPythonDecimalToString(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("-39402950693754869342983"); + PyObject* python_object = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + ASSERT_NE(python_object, nullptr); + + std::string string_result; + ASSERT_OK(internal::PythonDecimalToString(python_object, &string_result)); + + return Status::OK(); +} + +Status TestInferPrecisionAndScale(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("-394029506937548693.42983"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal)); + + const auto expected_precision = + static_cast(decimal_string.size() - 2); // 1 for -, 1 for . + const int32_t expected_scale = 5; + + ASSERT_EQ(expected_precision, metadata.precision()); + ASSERT_EQ(expected_scale, metadata.scale()); + + return Status::OK(); +} + +Status TestInferPrecisionAndNegativeScale(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("-3.94042983E+10"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal)); + + const auto expected_precision = 11; + const int32_t expected_scale = 0; + + ASSERT_EQ(expected_precision, metadata.precision()); + ASSERT_EQ(expected_scale, metadata.scale()); + + return Status::OK(); +} + +Status TestInferAllLeadingZeros(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("0.001"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal)); + ASSERT_EQ(3, metadata.precision()); + ASSERT_EQ(3, metadata.scale()); + + return Status::OK(); +} + +Status TestInferAllLeadingZerosExponentialNotationPositive(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("0.01E5"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal)); + ASSERT_EQ(4, metadata.precision()); + ASSERT_EQ(0, metadata.scale()); + + return Status::OK(); +} + +Status TestInferAllLeadingZerosExponentialNotationNegative(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("0.01E3"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal)); + ASSERT_EQ(2, metadata.precision()); + ASSERT_EQ(0, metadata.scale()); + + return Status::OK(); +} + } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/python_test.h b/python/pyarrow/src/python_test.h index d472f51c50d..d3e4f11c4fa 100644 --- a/python/pyarrow/src/python_test.h +++ b/python/pyarrow/src/python_test.h @@ -49,5 +49,23 @@ ARROW_PYTHON_EXPORT Status TestNumPyBufferNumpyArray(); #endif +ARROW_PYTHON_EXPORT +Status TestPythonDecimalToString(); + +ARROW_PYTHON_EXPORT +Status TestInferPrecisionAndScale(); + +ARROW_PYTHON_EXPORT +Status TestInferPrecisionAndNegativeScale(); + +ARROW_PYTHON_EXPORT +Status TestInferAllLeadingZeros(); + +ARROW_PYTHON_EXPORT +Status TestInferAllLeadingZerosExponentialNotationPositive(); + +ARROW_PYTHON_EXPORT +Status TestInferAllLeadingZerosExponentialNotationNegative(); + } // namespace py } // namespace arrow diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index 16f8244a48f..31f71ea477b 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -75,15 +75,15 @@ def test_numpy_buffer_numpy_array(): def test_python_decimal_to_string(): - t.test_PythonDecimalToString() + t.test_TestPythonDecimalToString() def test_infer_precision_and_scale(): - t.test_InferPrecisionAndScale() + t.test_TestInferPrecisionAndScale() def test_infer_precision_and_negative_scale(): - t.test_InferPrecisionAndNegativeScale() + t.test_TestInferPrecisionAndNegativeScale() def test_infer_all_leading_zeros(): From 4818c5bfe794f5485a5c8c92a333cb1891983b75 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 26 Sep 2022 11:42:14 +0200 Subject: [PATCH 11/18] Add other decimal tests back to C++ and change them to return a Status --- python/pyarrow/_pyarrow_cpp_tests.pxd | 14 + python/pyarrow/_pyarrow_cpp_tests.pyx | 57 ++++ python/pyarrow/src/python_test.cc | 388 +++++++++++++++++++++++++- python/pyarrow/src/python_test.h | 42 +++ python/pyarrow/tests/test_python.py | 52 ++++ 5 files changed, 547 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index 397e1d01afe..b82c0a306ae 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -41,3 +41,17 @@ cdef extern from "arrow/python/python_test.h" namespace "arrow::py" nogil: CStatus TestInferAllLeadingZeros() CStatus TestInferAllLeadingZerosExponentialNotationPositive() CStatus TestInferAllLeadingZerosExponentialNotationNegative() + CStatus TestObjectBlockWriteFails() + CStatus TestMixedTypeFails() + CStatus TestFromPythonDecimalRescaleNotTruncateable() + CStatus TestFromPythonDecimalRescaleTruncateable() + CStatus TestFromPythonNegativeDecimalRescale() + CStatus TestDecimal128FromPythonInteger() + CStatus TestDecimal256FromPythonInteger() + CStatus TestDecimal128OverflowFails() + CStatus TestDecimal256OverflowFails() + CStatus TestNoneAndNaN() + CStatus TestMixedPrecisionAndScale() + CStatus TestMixedPrecisionAndScaleSequenceConvert() + CStatus TestSimpleInference() + CStatus TestUpdateWithNaN() diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index d3191071e09..84bdafb7401 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -62,6 +62,7 @@ def test_TestNumPyBufferNumpyArray(): def test_TestPythonDecimalToString(): check_status(TestPythonDecimalToString()) + def test_TestInferPrecisionAndScale(): check_status(TestInferPrecisionAndScale()) @@ -80,3 +81,59 @@ def test_TestInferAllLeadingZerosExponentialNotationPositive(): def test_TestInferAllLeadingZerosExponentialNotationNegative(): check_status(TestInferAllLeadingZerosExponentialNotationNegative()) + + +def test_TestObjectBlockWriteFails(): + check_status(TestObjectBlockWriteFails()) + + +def test_TestMixedTypeFails(): + check_status(TestMixedTypeFails()) + + +def test_TestFromPythonDecimalRescaleNotTruncateable(): + check_status(TestFromPythonDecimalRescaleNotTruncateable()) + + +def test_TestFromPythonDecimalRescaleTruncateable(): + check_status(TestFromPythonDecimalRescaleTruncateable()) + + +def test_TestFromPythonNegativeDecimalRescale(): + check_status(TestFromPythonNegativeDecimalRescale()) + + +def test_TestDecimal128FromPythonInteger(): + check_status(TestDecimal128FromPythonInteger()) + + +def test_TestDecimal256FromPythonInteger(): + check_status(TestDecimal256FromPythonInteger()) + + +def test_TestDecimal128OverflowFails(): + check_status(TestDecimal128OverflowFails()) + + +def test_TestDecimal256OverflowFails(): + check_status(TestDecimal256OverflowFails()) + + +def test_TestNoneAndNaN(): + check_status(TestNoneAndNaN()) + + +def test_TestMixedPrecisionAndScale(): + check_status(TestMixedPrecisionAndScale()) + + +def test_TestMixedPrecisionAndScaleSequenceConvert(): + check_status(TestMixedPrecisionAndScaleSequenceConvert()) + + +def test_TestSimpleInference(): + check_status(TestSimpleInference()) + + +def test_TestUpdateWithNaN(): + check_status(TestUpdateWithNaN()) diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index 003cbf4e5f9..afe263963d5 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -52,25 +52,25 @@ #define ASSERT_FALSE_PY(error){ \ auto&& _err = (error); \ if (_err != NULL){ \ - return Status::Invalid("An error occurred: ", _err); \ + return Status::Invalid("Expected no error, but got one"); \ } \ } #define ASSERT_TRUE_PY(error){ \ auto&& _err = (error); \ if (_err == NULL){ \ - return Status::Invalid("Expected an error but did not got one."); \ + return Status::Invalid("Expected an error but did not get one."); \ } \ } #define ASSERT_FALSE(error){ \ auto&& _err = (error); \ if (_err != false){ \ - return Status::Invalid("An error occurred: ", _err); \ + return Status::Invalid("Expected no error, but got one"); \ } \ } #define ASSERT_TRUE(error){ \ auto&& _err = (error); \ if (_err == false){ \ - return Status::Invalid("Expected an error but did not got one."); \ + return Status::Invalid("Expected an error but did not get one."); \ } \ } #define ASSERT_TRUE_MSG(error, msg){ \ @@ -82,11 +82,18 @@ } #define ASSERT_OK(expr){ \ for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); !_st.ok();) \ - return Status::Invalid(expr, "failed with", _st.ToString()); \ + return Status::Invalid(expr, " failed with ", _st.ToString()); \ +} +#define ASSERT_RAISES(st, expr){ \ + auto&& _st = (st); \ + for (::arrow::Status _st_expr = ::arrow::internal::GenericToStatus((expr)); \ + _st_expr.code() != _st.code();) \ + return Status::Invalid("Expected to fail with ", _st.ToString()); \ } - namespace arrow { +using internal::checked_cast; + namespace py { Status TestOwnedRefMoves() { @@ -435,5 +442,374 @@ Status TestInferAllLeadingZerosExponentialNotationNegative(){ return Status::OK(); } +Status TestObjectBlockWriteFails(){ + StringBuilder builder; + const char value[] = {'\xf1', '\0'}; + + for (int i = 0; i < 1000; ++i) { + ASSERT_OK(builder.Append(value, static_cast(strlen(value)))); + } + + std::shared_ptr arr; + ASSERT_OK(builder.Finish(&arr)); + + auto f1 = field("f1", utf8()); + auto f2 = field("f2", utf8()); + auto f3 = field("f3", utf8()); + std::vector> fields = {f1, f2, f3}; + std::vector> cols = {arr, arr, arr}; + + auto schema = ::arrow::schema(fields); + auto table = Table::Make(schema, cols); + + Status st; + Py_BEGIN_ALLOW_THREADS; + PyObject* out; + PandasOptions options; + options.use_threads = true; + st = ConvertTableToPandas(options, table, &out); + Py_END_ALLOW_THREADS; + ASSERT_RAISES(::arrow::Status::UnknownError(""), st); + + return Status::OK(); +} + +Status TestMixedTypeFails(){ + OwnedRef list_ref(PyList_New(3)); + PyObject* list = list_ref.obj(); + + ASSERT_NE(list, nullptr); + + PyObject* str = PyUnicode_FromString("abc"); + ASSERT_NE(str, nullptr); + + PyObject* integer = PyLong_FromLong(1234L); + ASSERT_NE(integer, nullptr); + + PyObject* doub = PyFloat_FromDouble(123.0234); + ASSERT_NE(doub, nullptr); + + // This steals a reference to each object, so we don't need to decref them later + // just the list + ASSERT_EQ(PyList_SetItem(list, 0, str), 0); + ASSERT_EQ(PyList_SetItem(list, 1, integer), 0); + ASSERT_EQ(PyList_SetItem(list, 2, doub), 0); + + ASSERT_RAISES(::arrow::Status::TypeError(""), + ConvertPySequence(list, nullptr, {})); + + return Status::OK(); +} + +template +Status DecimalTestFromPythonDecimalRescale(std::shared_ptr type, + PyObject* python_decimal, + std::optional expected) { + DecimalValue value; + const auto& decimal_type = checked_cast(*type); + + if (expected.has_value()) { + ASSERT_OK( + internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value)); + ASSERT_EQ(expected.value(), value); + + ASSERT_OK(internal::DecimalFromPyObject(python_decimal, decimal_type, &value)); + ASSERT_EQ(expected.value(), value); + } else { + ASSERT_RAISES(::arrow::Status::Invalid(""), + internal::DecimalFromPythonDecimal(python_decimal, + decimal_type, &value)); + ASSERT_RAISES(::arrow::Status::Invalid(""), + internal::DecimalFromPyObject(python_decimal, + decimal_type, &value)); + } + return Status::OK(); +} + +Status TestFromPythonDecimalRescaleNotTruncateable(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("1.001"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + // We fail when truncating values that would lose data if cast to a decimal type with + // lower scale + ASSERT_OK(DecimalTestFromPythonDecimalRescale(::arrow::decimal128(10, 2), + python_decimal, {})); + ASSERT_OK(DecimalTestFromPythonDecimalRescale(::arrow::decimal256(10, 2), + python_decimal, {})); + + return Status::OK(); +} + +Status TestFromPythonDecimalRescaleTruncateable(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("1.000"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + // We allow truncation of values that do not lose precision when dividing by 10 * the + // difference between the scales, e.g., 1.000 -> 1.00 + ASSERT_OK(DecimalTestFromPythonDecimalRescale( + ::arrow::decimal128(10, 2), python_decimal, 100)); + ASSERT_OK(DecimalTestFromPythonDecimalRescale( + ::arrow::decimal256(10, 2), python_decimal, 100)); + + return Status::OK(); +} + +Status TestFromPythonNegativeDecimalRescale(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("-1.000"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + ASSERT_OK(DecimalTestFromPythonDecimalRescale( + ::arrow::decimal128(10, 9), python_decimal, -1000000000)); + ASSERT_OK(DecimalTestFromPythonDecimalRescale( + ::arrow::decimal256(10, 9), python_decimal, -1000000000)); + + return Status::OK(); +} + +Status TestDecimal128FromPythonInteger(){ + Decimal128 value; + OwnedRef python_long(PyLong_FromLong(42)); + auto type = ::arrow::decimal128(10, 2); + const auto& decimal_type = checked_cast(*type); + ASSERT_OK(internal::DecimalFromPyObject(python_long.obj(), decimal_type, &value)); + ASSERT_EQ(4200, value); + return Status::OK(); +} + +Status TestDecimal256FromPythonInteger(){ + Decimal256 value; + OwnedRef python_long(PyLong_FromLong(42)); + auto type = ::arrow::decimal256(10, 2); + const auto& decimal_type = checked_cast(*type); + ASSERT_OK(internal::DecimalFromPyObject(python_long.obj(), decimal_type, &value)); + ASSERT_EQ(4200, value); + return Status::OK(); +} + +Status TestDecimal128OverflowFails(){ + Decimal128 value; + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("9999999999999999999999999999999999999.9"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal)); + ASSERT_EQ(38, metadata.precision()); + ASSERT_EQ(1, metadata.scale()); + + auto type = ::arrow::decimal(38, 38); + const auto& decimal_type = checked_cast(*type); + ASSERT_RAISES(::arrow::Status::Invalid(""), + internal::DecimalFromPythonDecimal(python_decimal, + decimal_type, &value)); + return Status::OK(); +} + +Status TestDecimal256OverflowFails(){ + Decimal256 value; + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("999999999999999999999999999999999999999999999999999999999999999999999999999.9"); + PyObject* python_decimal = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal)); + ASSERT_EQ(76, metadata.precision()); + ASSERT_EQ(1, metadata.scale()); + + auto type = ::arrow::decimal(76, 76); + const auto& decimal_type = checked_cast(*type); + ASSERT_RAISES(::arrow::Status::Invalid(""), + internal::DecimalFromPythonDecimal(python_decimal, + decimal_type, &value)); + return Status::OK(); +} + +Status TestNoneAndNaN(){ + OwnedRef list_ref(PyList_New(4)); + PyObject* list = list_ref.obj(); + + ASSERT_NE(list, nullptr); + + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + PyObject* constructor = decimal_constructor_.obj(); + PyObject* decimal_value = internal::DecimalFromString(constructor, "1.234"); + ASSERT_NE(decimal_value, nullptr); + + Py_INCREF(Py_None); + PyObject* missing_value1 = Py_None; + ASSERT_NE(missing_value1, nullptr); + + PyObject* missing_value2 = PyFloat_FromDouble(NPY_NAN); + ASSERT_NE(missing_value2, nullptr); + + PyObject* missing_value3 = internal::DecimalFromString(constructor, "nan"); + ASSERT_NE(missing_value3, nullptr); + + // This steals a reference to each object, so we don't need to decref them later, + // just the list + ASSERT_EQ(0, PyList_SetItem(list, 0, decimal_value)); + ASSERT_EQ(0, PyList_SetItem(list, 1, missing_value1)); + ASSERT_EQ(0, PyList_SetItem(list, 2, missing_value2)); + ASSERT_EQ(0, PyList_SetItem(list, 3, missing_value3)); + + PyConversionOptions options; + ASSERT_RAISES(::arrow::Status::TypeError(""), + ConvertPySequence(list, nullptr, options)); + + options.from_pandas = true; + auto chunked = std::move(ConvertPySequence(list, nullptr, options)).ValueOrDie(); + ASSERT_EQ(chunked->num_chunks(), 1); + + auto arr = chunked->chunk(0); + ASSERT_TRUE(arr->IsValid(0)); + ASSERT_TRUE(arr->IsNull(1)); + ASSERT_TRUE(arr->IsNull(2)); + ASSERT_TRUE(arr->IsNull(3)); + + return Status::OK(); +} + +Status TestMixedPrecisionAndScale(){ + std::vector strings{{"0.001", "1.01E5", "1.01E5"}}; + + OwnedRef list_ref(PyList_New(static_cast(strings.size()))); + PyObject* list = list_ref.obj(); + + ASSERT_NE(list, nullptr); + + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + // PyList_SetItem steals a reference to the item so we don't decref it later + PyObject* decimal_constructor = decimal_constructor_.obj(); + for (Py_ssize_t i = 0; i < static_cast(strings.size()); ++i) { + const int result = PyList_SetItem( + list, i, internal::DecimalFromString(decimal_constructor, strings.at(i))); + ASSERT_EQ(0, result); + } + + auto arr = std::move(ConvertPySequence(list, nullptr, {})).ValueOrDie(); + const auto& type = checked_cast(*arr->type()); + + int32_t expected_precision = 9; + int32_t expected_scale = 3; + ASSERT_EQ(expected_precision, type.precision()); + ASSERT_EQ(expected_scale, type.scale()); + + return Status::OK(); +} + +Status TestMixedPrecisionAndScaleSequenceConvert(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string_1("0.01"); + PyObject* value1 = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string_1); + ASSERT_NE(value1, nullptr); + + std::string decimal_string_2("0.001"); + PyObject* value2 = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string_2); + ASSERT_NE(value2, nullptr); + + OwnedRef list_ref(PyList_New(2)); + PyObject* list = list_ref.obj(); + + // This steals a reference to each object, so we don't need to decref them later + // just the list + ASSERT_EQ(PyList_SetItem(list, 0, value1), 0); + ASSERT_EQ(PyList_SetItem(list, 1, value2), 0); + + auto arr = std::move(ConvertPySequence(list, nullptr, {})).ValueOrDie(); + const auto& type = checked_cast(*arr->type()); + ASSERT_EQ(3, type.precision()); + ASSERT_EQ(3, type.scale()); + + return Status::OK(); +} + +Status TestSimpleInference(){ + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + + std::string decimal_string("0.01"); + PyObject* value = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + ASSERT_NE(value, nullptr); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(value)); + ASSERT_EQ(2, metadata.precision()); + ASSERT_EQ(2, metadata.scale()); + + return Status::OK(); +} + +Status TestUpdateWithNaN(){ + internal::DecimalMetadata metadata; + OwnedRef decimal_constructor_; + OwnedRef decimal_module; + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(internal::ImportFromModule(decimal_module.obj(), "Decimal", + &decimal_constructor_)); + std::string decimal_string("nan"); + PyObject* nan_value = internal::DecimalFromString(decimal_constructor_.obj(), + decimal_string); + + ASSERT_OK(metadata.Update(nan_value)); + ASSERT_EQ(std::numeric_limits::min(), metadata.precision()); + ASSERT_EQ(std::numeric_limits::min(), metadata.scale()); + + return Status::OK(); +} + } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/python_test.h b/python/pyarrow/src/python_test.h index d3e4f11c4fa..4a1a16af9a9 100644 --- a/python/pyarrow/src/python_test.h +++ b/python/pyarrow/src/python_test.h @@ -67,5 +67,47 @@ Status TestInferAllLeadingZerosExponentialNotationPositive(); ARROW_PYTHON_EXPORT Status TestInferAllLeadingZerosExponentialNotationNegative(); +ARROW_PYTHON_EXPORT +Status TestObjectBlockWriteFails(); + +ARROW_PYTHON_EXPORT +Status TestMixedTypeFails(); + +ARROW_PYTHON_EXPORT +Status TestFromPythonDecimalRescaleNotTruncateable(); + +ARROW_PYTHON_EXPORT +Status TestFromPythonDecimalRescaleTruncateable(); + +ARROW_PYTHON_EXPORT +Status TestFromPythonNegativeDecimalRescale(); + +ARROW_PYTHON_EXPORT +Status TestDecimal128FromPythonInteger(); + +ARROW_PYTHON_EXPORT +Status TestDecimal256FromPythonInteger(); + +ARROW_PYTHON_EXPORT +Status TestDecimal128OverflowFails(); + +ARROW_PYTHON_EXPORT +Status TestDecimal256OverflowFails(); + +ARROW_PYTHON_EXPORT +Status TestNoneAndNaN(); + +ARROW_PYTHON_EXPORT +Status TestMixedPrecisionAndScale(); + +ARROW_PYTHON_EXPORT +Status TestMixedPrecisionAndScaleSequenceConvert(); + +ARROW_PYTHON_EXPORT +Status TestSimpleInference(); + +ARROW_PYTHON_EXPORT +Status TestUpdateWithNaN(); + } // namespace py } // namespace arrow diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index 31f71ea477b..ad5aabed8b3 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -96,3 +96,55 @@ def test_infer_all_leading_zeros_e_pos(): def test_infer_all_leading_zeros_e_neg(): t.test_TestInferAllLeadingZerosExponentialNotationNegative() + + +def test_object_block_write_fails(): + t.test_TestObjectBlockWriteFails() + + +def test_mixed_type_fails(): + t.test_TestMixedTypeFails() + + +def test_from_python_decimal_rescale_not_truncateable(): + t.test_TestFromPythonDecimalRescaleNotTruncateable() + + +def test_from_python_decimal_rescale_truncateable(): + t.test_TestFromPythonDecimalRescaleTruncateable() + + +def test_from_python_negative_decimal_rescale(): + t.test_TestFromPythonNegativeDecimalRescale() + + +def test_decimal_128_from_python_integer(): + t.test_TestDecimal128FromPythonInteger() + + +def test_decimal_256_from_python_integer(): + t.test_TestDecimal256FromPythonInteger() + + +def test_decimal_128_overflow_fails(): + t.test_TestDecimal128OverflowFails() + + +def test_decimal_256_overflow_fails(): + t.test_TestDecimal256OverflowFails() + + +def test_none_and_nan(): + t.test_TestNoneAndNaN() + + +def test_mixed_precision_and_scale(): + t.test_TestMixedPrecisionAndScale() + + +def test_simple_inference(): + t.test_TestSimpleInference() + + +def test_update_with_nan(): + t.test_TestUpdateWithNaN() From cea188b236a2dec210cc85e2ef2808064751edfc Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 26 Sep 2022 13:27:27 +0200 Subject: [PATCH 12/18] Try to fix invalid conversion error --- python/pyarrow/src/python_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index afe263963d5..f847b81ce9e 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -255,7 +255,7 @@ Status TestPyBufferInvalidInputObject() { // ("unresolved external symbol arrow_ARRAY_API referenced"). #ifndef _WIN32 Status TestPyBufferNumpyArray() { - const npy_intp dims[1] = {10}; + npy_intp dims[1] = {10}; OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT)); PyObject* arr = arr_ref.obj(); From ffda7c229c6bfc16d7afdca547c674040cbd3725 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 26 Sep 2022 13:28:29 +0200 Subject: [PATCH 13/18] Try different approach to skip test on win --- python/pyarrow/tests/test_python.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index ad5aabed8b3..6bfcf8bf2cf 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -60,17 +60,17 @@ def test_py_buffer_invalid_input_object(): t.test_TestPyBufferInvalidInputObject() -@pytest.mark.skipif(sys.platform == 'win32', - reason="C++ test are skipped on Windows due to " - "the Numpy C API instance not being visible") def test_py_buffer_numpy_array(): + if sys.platform == 'win32': + pytest.skip("C++ test are skipped on Windows due to " + "the Numpy C API instance not being visible") t.test_TestPyBufferNumpyArray() -@pytest.mark.skipif(sys.platform == 'win32', - reason="C++ test are skipped on Windows due to " - "the Numpy C API instance not being visible") def test_numpy_buffer_numpy_array(): + if sys.platform == 'win32': + pytest.skip("C++ test are skipped on Windows due to " + "the Numpy C API instance not being visible") t.test_TestNumPyBufferNumpyArray() From ba5081aad470255d5d92c6078ca7a7a0e22914bc Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 26 Sep 2022 15:41:57 +0200 Subject: [PATCH 14/18] Automate C++ test collection --- python/pyarrow/_pyarrow_cpp_tests.pxd | 38 ++----- python/pyarrow/_pyarrow_cpp_tests.pyx | 157 +++++++------------------- python/pyarrow/src/python_test.cc | 48 +++++++- python/pyarrow/src/python_test.h | 97 +++------------- python/pyarrow/tests/test_python.py | 141 ++--------------------- 5 files changed, 119 insertions(+), 362 deletions(-) diff --git a/python/pyarrow/_pyarrow_cpp_tests.pxd b/python/pyarrow/_pyarrow_cpp_tests.pxd index b82c0a306ae..91c0220d731 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pxd +++ b/python/pyarrow/_pyarrow_cpp_tests.pxd @@ -21,37 +21,13 @@ from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport CStatus -cdef extern from "arrow/python/python_test.h" namespace "arrow::py" nogil: - CStatus TestOwnedRefMoves() - CStatus TestOwnedRefNoGILMoves() - CStatus TestCheckPyErrorStatus() - CStatus TestCheckPyErrorStatusNoGIL() - CStatus TestRestorePyErrorBasics() - CStatus TestPyBufferInvalidInputObject() +ctypedef CStatus cb_test_func() - # ifdef _WIN32 - CStatus TestPyBufferNumpyArray() - CStatus TestNumPyBufferNumpyArray() - # endif +cdef extern from "arrow/python/python_test.h" namespace "arrow::py::testing" nogil: - CStatus TestPythonDecimalToString() - CStatus TestInferPrecisionAndScale() - CStatus TestInferPrecisionAndNegativeScale() - CStatus TestInferAllLeadingZeros() - CStatus TestInferAllLeadingZerosExponentialNotationPositive() - CStatus TestInferAllLeadingZerosExponentialNotationNegative() - CStatus TestObjectBlockWriteFails() - CStatus TestMixedTypeFails() - CStatus TestFromPythonDecimalRescaleNotTruncateable() - CStatus TestFromPythonDecimalRescaleTruncateable() - CStatus TestFromPythonNegativeDecimalRescale() - CStatus TestDecimal128FromPythonInteger() - CStatus TestDecimal256FromPythonInteger() - CStatus TestDecimal128OverflowFails() - CStatus TestDecimal256OverflowFails() - CStatus TestNoneAndNaN() - CStatus TestMixedPrecisionAndScale() - CStatus TestMixedPrecisionAndScaleSequenceConvert() - CStatus TestSimpleInference() - CStatus TestUpdateWithNaN() + cdef cppclass CTestCase "arrow::py::testing::TestCase": + c_string name + cb_test_func func + + vector[CTestCase] GetCppTestCases() diff --git a/python/pyarrow/_pyarrow_cpp_tests.pyx b/python/pyarrow/_pyarrow_cpp_tests.pyx index 84bdafb7401..adb14835130 100644 --- a/python/pyarrow/_pyarrow_cpp_tests.pyx +++ b/python/pyarrow/_pyarrow_cpp_tests.pyx @@ -20,120 +20,43 @@ from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport * -from pyarrow.lib cimport (check_status) - -from decimal import Decimal - - -def test_TestOwnedRefMoves(): - check_status(TestOwnedRefMoves()) - - -def test_TestOwnedRefNoGILMoves(): - check_status(TestOwnedRefNoGILMoves()) - - -def test_TestCheckPyErrorStatus(): - check_status(TestCheckPyErrorStatus()) - - -def test_TestCheckPyErrorStatusNoGIL(): - check_status(TestCheckPyErrorStatusNoGIL()) - - -def test_TestRestorePyErrorBasics(): - check_status(TestRestorePyErrorBasics()) - - -def test_TestPyBufferInvalidInputObject(): - check_status(TestPyBufferInvalidInputObject()) - - -# ifdef _WIN32 -def test_TestPyBufferNumpyArray(): - check_status(TestPyBufferNumpyArray()) - - -def test_TestNumPyBufferNumpyArray(): - check_status(TestNumPyBufferNumpyArray()) -# endif - - -def test_TestPythonDecimalToString(): - check_status(TestPythonDecimalToString()) - - -def test_TestInferPrecisionAndScale(): - check_status(TestInferPrecisionAndScale()) - - -def test_TestInferPrecisionAndNegativeScale(): - check_status(TestInferPrecisionAndNegativeScale()) - - -def test_TestInferAllLeadingZeros(): - check_status(TestInferAllLeadingZeros()) - - -def test_TestInferAllLeadingZerosExponentialNotationPositive(): - check_status(TestInferAllLeadingZerosExponentialNotationPositive()) - - -def test_TestInferAllLeadingZerosExponentialNotationNegative(): - check_status(TestInferAllLeadingZerosExponentialNotationNegative()) - - -def test_TestObjectBlockWriteFails(): - check_status(TestObjectBlockWriteFails()) - - -def test_TestMixedTypeFails(): - check_status(TestMixedTypeFails()) - - -def test_TestFromPythonDecimalRescaleNotTruncateable(): - check_status(TestFromPythonDecimalRescaleNotTruncateable()) - - -def test_TestFromPythonDecimalRescaleTruncateable(): - check_status(TestFromPythonDecimalRescaleTruncateable()) - - -def test_TestFromPythonNegativeDecimalRescale(): - check_status(TestFromPythonNegativeDecimalRescale()) - - -def test_TestDecimal128FromPythonInteger(): - check_status(TestDecimal128FromPythonInteger()) - - -def test_TestDecimal256FromPythonInteger(): - check_status(TestDecimal256FromPythonInteger()) - - -def test_TestDecimal128OverflowFails(): - check_status(TestDecimal128OverflowFails()) - - -def test_TestDecimal256OverflowFails(): - check_status(TestDecimal256OverflowFails()) - - -def test_TestNoneAndNaN(): - check_status(TestNoneAndNaN()) - - -def test_TestMixedPrecisionAndScale(): - check_status(TestMixedPrecisionAndScale()) - - -def test_TestMixedPrecisionAndScaleSequenceConvert(): - check_status(TestMixedPrecisionAndScaleSequenceConvert()) - - -def test_TestSimpleInference(): - check_status(TestSimpleInference()) - - -def test_TestUpdateWithNaN(): - check_status(TestUpdateWithNaN()) +from pyarrow.lib cimport check_status + +from pyarrow.lib import frombytes + + +cdef class CppTestCase: + """ + A simple wrapper for a C++ test case. + """ + cdef: + CTestCase c_case + + @staticmethod + cdef wrap(CTestCase c_case): + cdef: + CppTestCase obj + obj = CppTestCase.__new__(CppTestCase) + obj.c_case = c_case + return obj + + @property + def name(self): + return frombytes(self.c_case.name) + + def __repr__(self): + return f"<{self.__class__.__name__} {self.name!r}>" + + def __call__(self): + check_status(self.c_case.func()) + + +def get_cpp_tests(): + """ + Get a list of C++ test cases. + """ + cases = [] + c_cases = GetCppTestCases() + for c_case in c_cases: + cases.append(CppTestCase.wrap(c_case)) + return cases diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index f847b81ce9e..9a7e6497dd8 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -26,14 +26,15 @@ #include "arrow/array/builder_binary.h" #include "arrow/table.h" #include "arrow/util/decimal.h" +#include "arrow/util/logging.h" #include "arrow_to_pandas.h" #include "decimal.h" #include "helpers.h" #include "numpy_convert.h" #include "numpy_interop.h" +#include "python_test.h" #include "python_to_arrow.h" -#include "arrow/util/logging.h" #define ASSERT_EQ(x, y) { \ auto&& _left = (x); \ @@ -95,6 +96,8 @@ namespace arrow { using internal::checked_cast; namespace py { +namespace testing { +namespace { Status TestOwnedRefMoves() { std::vector vec; @@ -811,5 +814,48 @@ Status TestUpdateWithNaN(){ return Status::OK(); } +} // namespace + +std::vector GetCppTestCases() { + return { + {"test_owned_ref_moves", TestOwnedRefMoves}, + {"test_owned_ref_nogil_moves", TestOwnedRefNoGILMoves}, + {"test_check_pyerror_status", TestCheckPyErrorStatus}, + {"test_check_pyerror_status_nogil", TestCheckPyErrorStatusNoGIL}, + {"test_restore_pyerror_basics", TestRestorePyErrorBasics}, + {"test_pybuffer_invalid_input_object", TestPyBufferInvalidInputObject}, +#ifndef _WIN32 + {"test_pybuffer_numpy_array", TestPyBufferNumpyArray}, + {"test_numpybuffer_numpy_array", TestNumPyBufferNumpyArray}, +#endif + {"test_python_decimal_to_string", TestPythonDecimalToString}, + {"test_infer_precision_and_scale", TestInferPrecisionAndScale}, + {"test_infer_precision_and_negative_scale", TestInferPrecisionAndNegativeScale}, + {"test_infer_all_leading_zeros", TestInferAllLeadingZeros}, + {"test_infer_all_leading_zeros_exponential_notation_positive", + TestInferAllLeadingZerosExponentialNotationPositive}, + {"test_infer_all_leading_zeros_exponential_notation_negative", + TestInferAllLeadingZerosExponentialNotationNegative}, + {"test_object_block_write_fails", TestObjectBlockWriteFails}, + {"test_mixed_type_fails", TestMixedTypeFails}, + {"test_from_python_decimal_rescale_not_truncateable", + TestFromPythonDecimalRescaleNotTruncateable}, + {"test_from_python_decimal_rescale_truncateable", + TestFromPythonDecimalRescaleTruncateable}, + {"test_from_python_negative_decimal_rescale", TestFromPythonNegativeDecimalRescale}, + {"test_decimal128_from_python_integer", TestDecimal128FromPythonInteger}, + {"test_decimal256_from_python_integer", TestDecimal256FromPythonInteger}, + {"test_decimal128_overflow_fails", TestDecimal128OverflowFails}, + {"test_decimal256_overflow_fails", TestDecimal256OverflowFails}, + {"test_none_and_nan", TestNoneAndNaN}, + {"test_mixed_precision_and_scale", TestMixedPrecisionAndScale}, + {"test_mixed_precision_and_scale_sequence_convert", + TestMixedPrecisionAndScaleSequenceConvert}, + {"test_simple_inference", TestSimpleInference}, + {"test_update_with_nan", TestUpdateWithNaN}, + }; +} + +} // namespace testing } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/python_test.h b/python/pyarrow/src/python_test.h index 4a1a16af9a9..db0f7ed3134 100644 --- a/python/pyarrow/src/python_test.h +++ b/python/pyarrow/src/python_test.h @@ -17,97 +17,26 @@ #pragma once +#include +#include +#include + +#include "arrow/status.h" + #include "visibility.h" -#include "arrow/type.h" namespace arrow { namespace py { +namespace testing { -ARROW_PYTHON_EXPORT -Status TestOwnedRefMoves(); - -ARROW_PYTHON_EXPORT -Status TestOwnedRefNoGILMoves(); - -ARROW_PYTHON_EXPORT -Status TestCheckPyErrorStatus(); - -ARROW_PYTHON_EXPORT -Status TestCheckPyErrorStatusNoGIL(); - -ARROW_PYTHON_EXPORT -Status TestRestorePyErrorBasics(); - -ARROW_PYTHON_EXPORT -Status TestPyBufferInvalidInputObject(); - -#ifndef _WIN32 -ARROW_PYTHON_EXPORT -Status TestPyBufferNumpyArray(); - -ARROW_PYTHON_EXPORT -Status TestNumPyBufferNumpyArray(); -#endif - -ARROW_PYTHON_EXPORT -Status TestPythonDecimalToString(); - -ARROW_PYTHON_EXPORT -Status TestInferPrecisionAndScale(); - -ARROW_PYTHON_EXPORT -Status TestInferPrecisionAndNegativeScale(); - -ARROW_PYTHON_EXPORT -Status TestInferAllLeadingZeros(); - -ARROW_PYTHON_EXPORT -Status TestInferAllLeadingZerosExponentialNotationPositive(); - -ARROW_PYTHON_EXPORT -Status TestInferAllLeadingZerosExponentialNotationNegative(); - -ARROW_PYTHON_EXPORT -Status TestObjectBlockWriteFails(); - -ARROW_PYTHON_EXPORT -Status TestMixedTypeFails(); - -ARROW_PYTHON_EXPORT -Status TestFromPythonDecimalRescaleNotTruncateable(); - -ARROW_PYTHON_EXPORT -Status TestFromPythonDecimalRescaleTruncateable(); - -ARROW_PYTHON_EXPORT -Status TestFromPythonNegativeDecimalRescale(); - -ARROW_PYTHON_EXPORT -Status TestDecimal128FromPythonInteger(); - -ARROW_PYTHON_EXPORT -Status TestDecimal256FromPythonInteger(); - -ARROW_PYTHON_EXPORT -Status TestDecimal128OverflowFails(); - -ARROW_PYTHON_EXPORT -Status TestDecimal256OverflowFails(); - -ARROW_PYTHON_EXPORT -Status TestNoneAndNaN(); - -ARROW_PYTHON_EXPORT -Status TestMixedPrecisionAndScale(); - -ARROW_PYTHON_EXPORT -Status TestMixedPrecisionAndScaleSequenceConvert(); - -ARROW_PYTHON_EXPORT -Status TestSimpleInference(); +struct TestCase { + std::string name; + std::function func; +}; ARROW_PYTHON_EXPORT -Status TestUpdateWithNaN(); +std::vector GetCppTestCases(); +} // namespace testing } // namespace py } // namespace arrow diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_python.py index 6bfcf8bf2cf..1c44bff9b5b 100644 --- a/python/pyarrow/tests/test_python.py +++ b/python/pyarrow/tests/test_python.py @@ -15,136 +15,19 @@ # specific language governing permissions and limitations # under the License. -# import importlib -# import sys +from pyarrow._pyarrow_cpp_tests import get_cpp_tests -# # list of Cython modules containing tests -# cython_test_modules = ["pyarrow._pyarrow_cpp_tests"] -# for mod in cython_test_modules: -# # For each callable in `mod` with name `test_*`, -# # set the result as an attribute of this module. -# mod = importlib.import_module(mod) -# for name in dir(mod): -# item = getattr(mod, name) -# if callable(item) and name.startswith("test_"): -# setattr(sys.modules[__name__], name, item) +def inject_cpp_tests(ns): + """ + Inject C++ tests as Python functions into namespace `ns` (a dict). + """ + for case in get_cpp_tests(): + def wrapper(case=case): + case() + wrapper.__name__ = wrapper.__qualname__ = case.name + wrapper.__module__ = ns['__name__'] + ns[case.name] = wrapper -import pyarrow._pyarrow_cpp_tests as t # noqa -import pytest -import sys - - -def test_owned_ref_moves(): - t.test_TestOwnedRefMoves() - - -def test_owned_ref_no_gil_moves(): - t.test_TestOwnedRefNoGILMoves() - - -def test_check_py_error_status(): - t.test_TestCheckPyErrorStatus() - - -def test_check_py_error_status_no_gil(): - t.test_TestCheckPyErrorStatusNoGIL() - - -def test_restore_py_error_basics(): - t.test_TestRestorePyErrorBasics() - - -def test_py_buffer_invalid_input_object(): - t.test_TestPyBufferInvalidInputObject() - - -def test_py_buffer_numpy_array(): - if sys.platform == 'win32': - pytest.skip("C++ test are skipped on Windows due to " - "the Numpy C API instance not being visible") - t.test_TestPyBufferNumpyArray() - - -def test_numpy_buffer_numpy_array(): - if sys.platform == 'win32': - pytest.skip("C++ test are skipped on Windows due to " - "the Numpy C API instance not being visible") - t.test_TestNumPyBufferNumpyArray() - - -def test_python_decimal_to_string(): - t.test_TestPythonDecimalToString() - - -def test_infer_precision_and_scale(): - t.test_TestInferPrecisionAndScale() - - -def test_infer_precision_and_negative_scale(): - t.test_TestInferPrecisionAndNegativeScale() - - -def test_infer_all_leading_zeros(): - t.test_TestInferAllLeadingZeros() - - -def test_infer_all_leading_zeros_e_pos(): - t.test_TestInferAllLeadingZerosExponentialNotationPositive() - - -def test_infer_all_leading_zeros_e_neg(): - t.test_TestInferAllLeadingZerosExponentialNotationNegative() - - -def test_object_block_write_fails(): - t.test_TestObjectBlockWriteFails() - - -def test_mixed_type_fails(): - t.test_TestMixedTypeFails() - - -def test_from_python_decimal_rescale_not_truncateable(): - t.test_TestFromPythonDecimalRescaleNotTruncateable() - - -def test_from_python_decimal_rescale_truncateable(): - t.test_TestFromPythonDecimalRescaleTruncateable() - - -def test_from_python_negative_decimal_rescale(): - t.test_TestFromPythonNegativeDecimalRescale() - - -def test_decimal_128_from_python_integer(): - t.test_TestDecimal128FromPythonInteger() - - -def test_decimal_256_from_python_integer(): - t.test_TestDecimal256FromPythonInteger() - - -def test_decimal_128_overflow_fails(): - t.test_TestDecimal128OverflowFails() - - -def test_decimal_256_overflow_fails(): - t.test_TestDecimal256OverflowFails() - - -def test_none_and_nan(): - t.test_TestNoneAndNaN() - - -def test_mixed_precision_and_scale(): - t.test_TestMixedPrecisionAndScale() - - -def test_simple_inference(): - t.test_TestSimpleInference() - - -def test_update_with_nan(): - t.test_TestUpdateWithNaN() +inject_cpp_tests(globals()) From 002bb71e368bba5d045c0fbd8c6d58ec9520898a Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 26 Sep 2022 16:01:04 +0200 Subject: [PATCH 15/18] Improve assertions --- python/pyarrow/src/python_test.cc | 96 ++++++++++++++++--------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/python/pyarrow/src/python_test.cc b/python/pyarrow/src/python_test.cc index 9a7e6497dd8..844d1b8cccb 100644 --- a/python/pyarrow/src/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -40,57 +40,62 @@ auto&& _left = (x); \ auto&& _right = (y); \ if (_left != _right) { \ - return Status::Invalid("Expected equality but ", _left, " != ", _right); \ + return Status::Invalid("Expected equality between `", #x, "` and `", #y, \ + "`, but ", _left, " != ", _right); \ } \ } -#define ASSERT_NE(x, y){ \ + +#define ASSERT_NE(x, y) { \ auto&& _left = (x); \ auto&& _right = (y); \ if (_left == _right) { \ - return Status::Invalid("Expected inequality but ", _left, " == ", _right); \ - } \ -} -#define ASSERT_FALSE_PY(error){ \ - auto&& _err = (error); \ - if (_err != NULL){ \ - return Status::Invalid("Expected no error, but got one"); \ + return Status::Invalid("Expected inequality between `", #x, "` and `", #y, \ + "`, but ", _left, " == ", _right); \ } \ } -#define ASSERT_TRUE_PY(error){ \ - auto&& _err = (error); \ - if (_err == NULL){ \ - return Status::Invalid("Expected an error but did not get one."); \ + +#define ASSERT_FALSE(v) { \ + auto&& _v = (v); \ + if (!!_v) { \ + return Status::Invalid("Expected `", #v, "` to evaluate to false, but got ", _v); \ } \ } -#define ASSERT_FALSE(error){ \ - auto&& _err = (error); \ - if (_err != false){ \ - return Status::Invalid("Expected no error, but got one"); \ + +#define ASSERT_TRUE(v){ \ + auto&& _v = (v); \ + if (!_v) { \ + return Status::Invalid("Expected `", #v, "` to evaluate to true, but got ", _v); \ } \ } -#define ASSERT_TRUE(error){ \ - auto&& _err = (error); \ - if (_err == false){ \ - return Status::Invalid("Expected an error but did not get one."); \ + +#define ASSERT_FALSE_MSG(v, msg) { \ + auto&& _v = (v); \ + if (!!_v) { \ + return Status::Invalid("Expected `", #v, "` to evaluate to false, but got ", \ + _v, ": ", msg); \ } \ } -#define ASSERT_TRUE_MSG(error, msg){ \ - auto&& _err = (error); \ - auto&& _msg = (msg); \ - if (_err == false){ \ - return Status::Invalid("Expected an error but did not got one: ", _msg); \ + +#define ASSERT_TRUE_MSG(v, msg) { \ + auto&& _v = (v); \ + if (!_v) { \ + return Status::Invalid("Expected `", #v, "` to evaluate to true, but got ", \ + _v, ": ", msg); \ } \ } -#define ASSERT_OK(expr){ \ + +#define ASSERT_OK(expr) { \ for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); !_st.ok();) \ - return Status::Invalid(expr, " failed with ", _st.ToString()); \ + return Status::Invalid("`", #expr, "` failed with ", _st.ToString()); \ } -#define ASSERT_RAISES(st, expr){ \ - auto&& _st = (st); \ + +#define ASSERT_RAISES(code, expr) { \ for (::arrow::Status _st_expr = ::arrow::internal::GenericToStatus((expr)); \ - _st_expr.code() != _st.code();) \ - return Status::Invalid("Expected to fail with ", _st.ToString()); \ + !_st_expr.Is##code();) \ + return Status::Invalid("Expected `", #expr, "` to fail with ", \ + #code, ", but got ", _st_expr.ToString()); \ } + namespace arrow { using internal::checked_cast; @@ -156,7 +161,7 @@ Status TestCheckPyErrorStatus() { std::string expected_detail = "") { st = CheckPyError(); ASSERT_EQ(st.message(), expected_message); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_FALSE(PyErr_Occurred()); if (expected_detail.size() > 0) { auto detail = st.detail(); ASSERT_NE(detail, nullptr); @@ -198,7 +203,7 @@ Status TestCheckPyErrorStatus() { st = CheckPyError(StatusCode::SerializationError); ASSERT_TRUE(st.IsSerializationError()); ASSERT_EQ(st.message(), "some error"); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_FALSE(PyErr_Occurred()); return Status::OK(); } @@ -209,7 +214,7 @@ Status TestCheckPyErrorStatusNoGIL() { Status st; PyErr_SetString(PyExc_ZeroDivisionError, "zzzt"); st = ConvertPyError(); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_FALSE(PyErr_Occurred()); lock.release(); ASSERT_TRUE(st.IsUnknownError()); ASSERT_EQ(st.message(), "zzzt"); @@ -221,13 +226,13 @@ Status TestCheckPyErrorStatusNoGIL() { Status TestRestorePyErrorBasics() { PyErr_SetString(PyExc_ZeroDivisionError, "zzzt"); auto st = ConvertPyError(); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_FALSE(PyErr_Occurred()); ASSERT_TRUE(st.IsUnknownError()); ASSERT_EQ(st.message(), "zzzt"); ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError")); RestorePyError(st); - ASSERT_TRUE_PY(PyErr_Occurred()); + ASSERT_TRUE(PyErr_Occurred()); PyObject* exc_type; PyObject* exc_value; PyObject* exc_traceback; @@ -247,7 +252,7 @@ Status TestPyBufferInvalidInputObject() { { Status st = PyBuffer::FromPyObject(input).status(); ASSERT_TRUE_MSG(IsPyError(st), st.ToString()); - ASSERT_FALSE_PY(PyErr_Occurred()); + ASSERT_FALSE(PyErr_Occurred()); } ASSERT_EQ(old_refcnt, Py_REFCNT(input)); return Status::OK(); @@ -472,7 +477,7 @@ Status TestObjectBlockWriteFails(){ options.use_threads = true; st = ConvertTableToPandas(options, table, &out); Py_END_ALLOW_THREADS; - ASSERT_RAISES(::arrow::Status::UnknownError(""), st); + ASSERT_RAISES(UnknownError, st); return Status::OK(); } @@ -498,8 +503,7 @@ Status TestMixedTypeFails(){ ASSERT_EQ(PyList_SetItem(list, 1, integer), 0); ASSERT_EQ(PyList_SetItem(list, 2, doub), 0); - ASSERT_RAISES(::arrow::Status::TypeError(""), - ConvertPySequence(list, nullptr, {})); + ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, {})); return Status::OK(); } @@ -519,10 +523,10 @@ Status DecimalTestFromPythonDecimalRescale(std::shared_ptr type, ASSERT_OK(internal::DecimalFromPyObject(python_decimal, decimal_type, &value)); ASSERT_EQ(expected.value(), value); } else { - ASSERT_RAISES(::arrow::Status::Invalid(""), + ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value)); - ASSERT_RAISES(::arrow::Status::Invalid(""), + ASSERT_RAISES(Invalid, internal::DecimalFromPyObject(python_decimal, decimal_type, &value)); } @@ -629,7 +633,7 @@ Status TestDecimal128OverflowFails(){ auto type = ::arrow::decimal(38, 38); const auto& decimal_type = checked_cast(*type); - ASSERT_RAISES(::arrow::Status::Invalid(""), + ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value)); return Status::OK(); @@ -655,7 +659,7 @@ Status TestDecimal256OverflowFails(){ auto type = ::arrow::decimal(76, 76); const auto& decimal_type = checked_cast(*type); - ASSERT_RAISES(::arrow::Status::Invalid(""), + ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value)); return Status::OK(); @@ -694,7 +698,7 @@ Status TestNoneAndNaN(){ ASSERT_EQ(0, PyList_SetItem(list, 3, missing_value3)); PyConversionOptions options; - ASSERT_RAISES(::arrow::Status::TypeError(""), + ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, options)); options.from_pandas = true; From 9204b5c6763b2070a35ef60f74871ba77fb5c47e Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 27 Sep 2022 09:40:02 +0200 Subject: [PATCH 16/18] Remove redundant ignore warning from setup.cfg --- python/setup.cfg | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/setup.cfg b/python/setup.cfg index cb9d0cfca58..062ce2745d8 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -28,9 +28,6 @@ build-dir = doc/_build addopts = --ignore=scripts filterwarnings = error:The SparseDataFrame:FutureWarning - # ignore warnings when Cython functions are called - # from python tests - ignore::pytest.PytestCollectionWarning # Get a debug traceback when a test takes a really long time faulthandler_timeout = 300 From 1c2d8d6535ac6dfd78fbf03b911bc3b512590394 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 27 Sep 2022 10:57:06 +0200 Subject: [PATCH 17/18] Change test_python.py to test_cpp_internals.py --- python/pyarrow/tests/{test_python.py => test_cpp_internals.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python/pyarrow/tests/{test_python.py => test_cpp_internals.py} (100%) diff --git a/python/pyarrow/tests/test_python.py b/python/pyarrow/tests/test_cpp_internals.py similarity index 100% rename from python/pyarrow/tests/test_python.py rename to python/pyarrow/tests/test_cpp_internals.py From be21035a4aa86a099bb813b5d82f10b8ed0bab6c Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 28 Sep 2022 10:11:39 +0200 Subject: [PATCH 18/18] Remove python/pyarrow/src/util/ folder as it is not needed anymore --- python/pyarrow/src/util/CMakeLists.txt | 32 -------------------- python/pyarrow/src/util/test_main.cc | 41 -------------------------- 2 files changed, 73 deletions(-) delete mode 100644 python/pyarrow/src/util/CMakeLists.txt delete mode 100644 python/pyarrow/src/util/test_main.cc diff --git a/python/pyarrow/src/util/CMakeLists.txt b/python/pyarrow/src/util/CMakeLists.txt deleted file mode 100644 index 74141bebc8b..00000000000 --- a/python/pyarrow/src/util/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# -# arrow/python_test_main -# - -if(PYARROW_BUILD_TESTS) - add_library(arrow/python_test_main STATIC test_main.cc) - - if(APPLE) - target_link_libraries(arrow/python_test_main GTest::gtest dl) - set_target_properties(arrow/python_test_main PROPERTIES LINK_FLAGS - "-undefined dynamic_lookup") - else() - target_link_libraries(arrow/python_test_main GTest::gtest pthread dl) - endif() -endif() diff --git a/python/pyarrow/src/util/test_main.cc b/python/pyarrow/src/util/test_main.cc deleted file mode 100644 index 3ee1657e644..00000000000 --- a/python/pyarrow/src/util/test_main.cc +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "../platform.h" - -#include - -#include "../datetime.h" -#include "../init.h" -#include "../pyarrow.h" - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - - Py_Initialize(); - int ret = arrow_init_numpy(); - if (ret != 0) { - return ret; - } - ::arrow::py::internal::InitDatetime(); - - ret = RUN_ALL_TESTS(); - - Py_Finalize(); - - return ret; -}