From 432a41e9ee7022b3226e71abd6240278b772bbb5 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 31 Jul 2025 16:18:49 +0200 Subject: [PATCH 1/5] add regression test --- python/pyarrow/tests/test_compute.py | 42 ++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index ad61dbc48a7..8f7768eb2c0 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1952,6 +1952,48 @@ def test_fill_null_chunked_array(arrow_type): assert result.equals(expected) +def test_fill_null_windows_regression1(): + arr = pa.array([True, False, False, False, False, None]) + s = pa.scalar(True, type=pa.bool_()) + + result = pa.compute.call_function("coalesce", [arr, s]) + expected = pa.array([True, False, False, False, False, True]) + + assert result.equals(expected) + + +def test_fill_null_windows_regression2(): + arr = pa.array([True, False, False, False, False, None]) + s = True + + result = pa.compute.call_function("coalesce", [arr, s]) + expected = pa.array([True, False, False, False, False, True]) + + assert result.equals(expected) + + +def test_fill_null_windows_regression4(): + ty = pa.int64() + arr = pa.array([1, 2, 2, 2, 2, None], type=ty) + s = pa.scalar(1, type=ty) + + result = pa.compute.call_function("coalesce", [arr, s]) + expected = pa.array([1, 2, 2, 2, 2, 1], type=ty) + + assert result.equals(expected) + + +def test_fill_null_windows_regression5(): + ty = pa.int32() + arr = pa.array([1, 2, 2, 2, 2, None], type=ty) + s = pa.scalar(1, type=ty) + + result = pa.compute.call_function("coalesce", [arr, s]) + expected = pa.array([1, 2, 2, 2, 2, 1], type=ty) + + assert result.equals(expected) + + def test_logical(): a = pa.array([True, False, False, None]) b = pa.array([True, True, False, True]) From 8f0d2e0b7028245902d826e440dc530487cb6cd1 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 31 Jul 2025 17:05:54 +0200 Subject: [PATCH 2/5] add cpp regression test --- cpp/src/arrow/compute/kernels/scalar_if_else_test.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index e007a16d13b..eaa1c67996f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -3144,6 +3144,12 @@ TEST(TestCoalesce, Boolean) { ArrayFromJSON(type, "[true, true, false, true]")); CheckScalar("coalesce", {scalar1, values1}, ArrayFromJSON(type, "[false, false, false, false]")); + + // Regression test from https://github.com/apache/arrow/issues/47234 + auto values_with_null = ArrayFromJSON(type, "[true, false, false, false, false, null]"); + auto expected = ArrayFromJSON(type, "[true, false, false, false, false, true]"); + auto scalar2 = ScalarFromJSON(type, "true"); + CheckScalar("coalesce", {values_with_null, scalar2}, expected); } TEST(TestCoalesce, DayTimeInterval) { From aa5fbf9ae4c58c072a1ce89476d40bf20438e5c0 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 18 Aug 2025 10:39:13 +0200 Subject: [PATCH 3/5] More insightful test vectors --- python/pyarrow/tests/test_compute.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 8f7768eb2c0..c528937b11c 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1974,22 +1974,22 @@ def test_fill_null_windows_regression2(): def test_fill_null_windows_regression4(): ty = pa.int64() - arr = pa.array([1, 2, 2, 2, 2, None], type=ty) - s = pa.scalar(1, type=ty) + arr = pa.array([1, 2, 3, 4, 5, None], type=ty) + s = pa.scalar(42, type=ty) result = pa.compute.call_function("coalesce", [arr, s]) - expected = pa.array([1, 2, 2, 2, 2, 1], type=ty) + expected = pa.array([1, 2, 3, 4, 5, 42], type=ty) assert result.equals(expected) def test_fill_null_windows_regression5(): ty = pa.int32() - arr = pa.array([1, 2, 2, 2, 2, None], type=ty) - s = pa.scalar(1, type=ty) + arr = pa.array([1, 2, 3, 4, 5, None], type=ty) + s = pa.scalar(42, type=ty) result = pa.compute.call_function("coalesce", [arr, s]) - expected = pa.array([1, 2, 2, 2, 2, 1], type=ty) + expected = pa.array([1, 2, 3, 4, 5, 42], type=ty) assert result.equals(expected) From b4d8b6f2e11a9579ddf91dc14d075ea1cd6a9ba7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 18 Aug 2025 14:54:48 +0200 Subject: [PATCH 4/5] Add low-level logs --- ci/scripts/python_wheel_windows_test.bat | 2 +- cpp/src/arrow/compute/kernels/copy_data_internal.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat index a686215b93d..da027dc1a66 100755 --- a/ci/scripts/python_wheel_windows_test.bat +++ b/ci/scripts/python_wheel_windows_test.bat @@ -59,4 +59,4 @@ py -0p %PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1 @REM Execute unittest -%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1 +%PYTHON_CMD% -m pytest -r s --pyargs pyarrow.tests.test_compute || exit /B 1 diff --git a/cpp/src/arrow/compute/kernels/copy_data_internal.h b/cpp/src/arrow/compute/kernels/copy_data_internal.h index 735c2034c3f..93093ace8e9 100644 --- a/cpp/src/arrow/compute/kernels/copy_data_internal.h +++ b/cpp/src/arrow/compute/kernels/copy_data_internal.h @@ -30,6 +30,7 @@ template <> struct CopyDataUtils { static void CopyData(const DataType&, const Scalar& in, const int64_t in_offset, uint8_t* out, const int64_t out_offset, const int64_t length) { + ARROW_LOG(INFO) << "Boolean CopyData from Scalar to offset " << out_offset << " with length " << length; bit_util::SetBitsTo( out, out_offset, length, in.is_valid ? checked_cast(in).value : false); @@ -37,6 +38,7 @@ struct CopyDataUtils { static void CopyData(const DataType&, const uint8_t* in, const int64_t in_offset, uint8_t* out, const int64_t out_offset, const int64_t length) { + ARROW_LOG(INFO) << "Boolean CopyData from Array at offset " << in_offset << " to offset " << out_offset << " with length " << length; arrow::internal::CopyBitmap(in, in_offset, length, out, out_offset); } @@ -91,11 +93,13 @@ struct CopyDataUtils< uint8_t* out, const int64_t out_offset, const int64_t length) { CType* begin = reinterpret_cast(out) + out_offset; CType* end = begin + length; + ARROW_LOG(INFO) << "CopyData from Scalar to offset " << out_offset << " with length " << length; std::fill(begin, end, UnboxScalar::Unbox(in)); } static void CopyData(const DataType&, const uint8_t* in, const int64_t in_offset, uint8_t* out, const int64_t out_offset, const int64_t length) { + ARROW_LOG(INFO) << "CopyData from Array at offset " << in_offset << " to offset " << out_offset << " with length " << length; std::memcpy(out + out_offset * sizeof(CType), in + in_offset * sizeof(CType), length * sizeof(CType)); } From 24e90ddb2f06603ef61c8a89506ba4e9b0ad5564 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 15 Sep 2025 17:15:48 +0200 Subject: [PATCH 5/5] Remove temporary changes --- ci/scripts/python_wheel_windows_test.bat | 2 +- .../compute/kernels/copy_data_internal.h | 4 -- .../compute/kernels/scalar_if_else_test.cc | 4 +- python/pyarrow/tests/test_compute.py | 44 +++++-------------- 4 files changed, 16 insertions(+), 38 deletions(-) diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat index da027dc1a66..a686215b93d 100755 --- a/ci/scripts/python_wheel_windows_test.bat +++ b/ci/scripts/python_wheel_windows_test.bat @@ -59,4 +59,4 @@ py -0p %PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1 @REM Execute unittest -%PYTHON_CMD% -m pytest -r s --pyargs pyarrow.tests.test_compute || exit /B 1 +%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1 diff --git a/cpp/src/arrow/compute/kernels/copy_data_internal.h b/cpp/src/arrow/compute/kernels/copy_data_internal.h index 93093ace8e9..735c2034c3f 100644 --- a/cpp/src/arrow/compute/kernels/copy_data_internal.h +++ b/cpp/src/arrow/compute/kernels/copy_data_internal.h @@ -30,7 +30,6 @@ template <> struct CopyDataUtils { static void CopyData(const DataType&, const Scalar& in, const int64_t in_offset, uint8_t* out, const int64_t out_offset, const int64_t length) { - ARROW_LOG(INFO) << "Boolean CopyData from Scalar to offset " << out_offset << " with length " << length; bit_util::SetBitsTo( out, out_offset, length, in.is_valid ? checked_cast(in).value : false); @@ -38,7 +37,6 @@ struct CopyDataUtils { static void CopyData(const DataType&, const uint8_t* in, const int64_t in_offset, uint8_t* out, const int64_t out_offset, const int64_t length) { - ARROW_LOG(INFO) << "Boolean CopyData from Array at offset " << in_offset << " to offset " << out_offset << " with length " << length; arrow::internal::CopyBitmap(in, in_offset, length, out, out_offset); } @@ -93,13 +91,11 @@ struct CopyDataUtils< uint8_t* out, const int64_t out_offset, const int64_t length) { CType* begin = reinterpret_cast(out) + out_offset; CType* end = begin + length; - ARROW_LOG(INFO) << "CopyData from Scalar to offset " << out_offset << " with length " << length; std::fill(begin, end, UnboxScalar::Unbox(in)); } static void CopyData(const DataType&, const uint8_t* in, const int64_t in_offset, uint8_t* out, const int64_t out_offset, const int64_t length) { - ARROW_LOG(INFO) << "CopyData from Array at offset " << in_offset << " to offset " << out_offset << " with length " << length; std::memcpy(out + out_offset * sizeof(CType), in + in_offset * sizeof(CType), length * sizeof(CType)); } diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index eaa1c67996f..196912679ba 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -3145,7 +3145,9 @@ TEST(TestCoalesce, Boolean) { CheckScalar("coalesce", {scalar1, values1}, ArrayFromJSON(type, "[false, false, false, false]")); - // Regression test from https://github.com/apache/arrow/issues/47234 + // Regression test for GH-47234, which was failing due to a MSVC compiler bug + // (possibly https://developercommunity.visualstudio.com/t/10912292 + // or https://developercommunity.visualstudio.com/t/10945478). auto values_with_null = ArrayFromJSON(type, "[true, false, false, false, false, null]"); auto expected = ArrayFromJSON(type, "[true, false, false, false, false, true]"); auto scalar2 = ScalarFromJSON(type, "true"); diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index c528937b11c..5441dd493d3 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1952,46 +1952,26 @@ def test_fill_null_chunked_array(arrow_type): assert result.equals(expected) -def test_fill_null_windows_regression1(): +def test_fill_null_windows_regression(): + # Regression test for GH-47234, which was failing due to a MSVC compiler bug + # (possibly https://developercommunity.visualstudio.com/t/10912292 + # or https://developercommunity.visualstudio.com/t/10945478) arr = pa.array([True, False, False, False, False, None]) s = pa.scalar(True, type=pa.bool_()) result = pa.compute.call_function("coalesce", [arr, s]) + result.validate(full=True) expected = pa.array([True, False, False, False, False, True]) - - assert result.equals(expected) - - -def test_fill_null_windows_regression2(): - arr = pa.array([True, False, False, False, False, None]) - s = True - - result = pa.compute.call_function("coalesce", [arr, s]) - expected = pa.array([True, False, False, False, False, True]) - assert result.equals(expected) + for ty in [pa.int8(), pa.int16(), pa.int32(), pa.int64()]: + arr = pa.array([1, 2, 3, 4, 5, None], type=ty) + s = pa.scalar(42, type=ty) -def test_fill_null_windows_regression4(): - ty = pa.int64() - arr = pa.array([1, 2, 3, 4, 5, None], type=ty) - s = pa.scalar(42, type=ty) - - result = pa.compute.call_function("coalesce", [arr, s]) - expected = pa.array([1, 2, 3, 4, 5, 42], type=ty) - - assert result.equals(expected) - - -def test_fill_null_windows_regression5(): - ty = pa.int32() - arr = pa.array([1, 2, 3, 4, 5, None], type=ty) - s = pa.scalar(42, type=ty) - - result = pa.compute.call_function("coalesce", [arr, s]) - expected = pa.array([1, 2, 3, 4, 5, 42], type=ty) - - assert result.equals(expected) + result = pa.compute.call_function("coalesce", [arr, s]) + result.validate(full=True) + expected = pa.array([1, 2, 3, 4, 5, 42], type=ty) + assert result.equals(expected) def test_logical():