diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index c17e70823d5..a382f766333 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -594,9 +594,20 @@ Status NumPyConverter::Visit(const FixedSizeBinaryType& type) { if (mask_ != nullptr) { Ndarray1DIndexer mask_values(mask_); - RETURN_NOT_OK(builder.AppendValues(data, length_, mask_values.data())); + RETURN_NOT_OK(builder.Reserve(length_)); + for (int64_t i = 0; i < length_; ++i) { + if (mask_values[i]) { + RETURN_NOT_OK(builder.AppendNull()); + } else { + RETURN_NOT_OK(builder.Append(data)); + } + data += stride_; + } } else { - RETURN_NOT_OK(builder.AppendValues(data, length_)); + for (int64_t i = 0; i < length_; ++i) { + RETURN_NOT_OK(builder.Append(data)); + data += stride_; + } } std::shared_ptr result; diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 086ed4cb160..30500bc3c5b 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2714,6 +2714,51 @@ def test_array_masked(): assert arr.type == pa.int64() +def test_binary_array_masked(): + # ARROW-12431 + masked_basic = pa.array([b'\x05'], type=pa.binary(1), + mask=np.array([False])) + assert [b'\x05'] == masked_basic.to_pylist() + + # Fixed Length Binary + masked = pa.array(np.array([b'\x05']), type=pa.binary(1), + mask=np.array([False])) + assert [b'\x05'] == masked.to_pylist() + + masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(1), + mask=np.array([True])) + assert [None] == masked_nulls.to_pylist() + + # Variable Length Binary + masked = pa.array(np.array([b'\x05']), type=pa.binary(), + mask=np.array([False])) + assert [b'\x05'] == masked.to_pylist() + + masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(), + mask=np.array([True])) + assert [None] == masked_nulls.to_pylist() + + # Fixed Length Binary, copy + npa = np.array([b'aaa', b'bbb', b'ccc']*10) + arrow_array = pa.array(npa, type=pa.binary(3), + mask=np.array([False, False, False]*10)) + npa[npa == b"bbb"] = b"XXX" + assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist() + + +def test_binary_array_strided(): + # Masked + nparray = np.array([b"ab", b"cd", b"ef"]) + arrow_array = pa.array(nparray[::2], pa.binary(2), + mask=np.array([False, False])) + assert [b"ab", b"ef"] == arrow_array.to_pylist() + + # Unmasked + nparray = np.array([b"ab", b"cd", b"ef"]) + arrow_array = pa.array(nparray[::2], pa.binary(2)) + assert [b"ab", b"ef"] == arrow_array.to_pylist() + + def test_array_invalid_mask_raises(): # ARROW-10742 cases = [ diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 77c18b839c6..7f904433fa2 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -1705,7 +1705,7 @@ def test_numpy_string_array_to_fixed_size_binary(self): expected = pa.array(list(arr), type=pa.binary(3)) assert converted.equals(expected) - mask = np.array([True, False, True]) + mask = np.array([False, True, False]) converted = pa.array(arr, type=pa.binary(3), mask=mask) expected = pa.array([b'foo', None, b'baz'], type=pa.binary(3)) assert converted.equals(expected)