Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1789,12 +1789,26 @@ cdef class BinaryArray(Array):
"""
Concrete class for Arrow arrays of variable-sized binary data type.
"""
@property
def total_values_length(self):
"""
The number of bytes from beginning to end of the data buffer addressed
by the offsets of this BinaryArray.
"""
return (<CBinaryArray*> self.ap).total_values_length()


cdef class LargeBinaryArray(Array):
"""
Concrete class for Arrow arrays of large variable-sized binary data type.
"""
@property
def total_values_length(self):
"""
The number of bytes from beginning to end of the data buffer addressed
by the offsets of this LargeBinaryArray.
"""
return (<CLargeBinaryArray*> self.ap).total_values_length()


cdef class DictionaryArray(Array):
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -593,12 +593,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
shared_ptr[CBuffer] value_data()
int32_t value_offset(int64_t i)
int32_t value_length(int64_t i)
int32_t total_values_length()

cdef cppclass CLargeBinaryArray" arrow::LargeBinaryArray"(CArray):
const uint8_t* GetValue(int i, int64_t* length)
shared_ptr[CBuffer] value_data()
int64_t value_offset(int64_t i)
int64_t value_length(int64_t i)
int64_t total_values_length()

cdef cppclass CStringArray" arrow::StringArray"(CBinaryArray):
CStringArray(int64_t length, shared_ptr[CBuffer] value_offsets,
Expand Down
12 changes: 12 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,18 @@ def test_binary_format():
assert result == expected


def test_binary_total_values_length():
arr = pa.array([b'0000', None, b'11111', b'222222', b'3333333'],
type='binary')
large_arr = pa.array([b'0000', None, b'11111', b'222222', b'3333333'],
type='large_binary')

assert arr.total_values_length == 22
assert arr.slice(1, 3).total_values_length == 11
assert large_arr.total_values_length == 22
assert large_arr.slice(1, 3).total_values_length == 11


def test_to_numpy_zero_copy():
arr = pa.array(range(10))

Expand Down