Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ if(ARROW_COMPUTE)
compute/kernels/scalar_validity.cc
compute/kernels/util_internal.cc
compute/kernels/vector_hash.cc
compute/kernels/vector_nested.cc
compute/kernels/vector_selection.cc
compute/kernels/vector_sort.cc)
endif()
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
add_arrow_compute_test(vector_test
SOURCES
vector_hash_test.cc
vector_nested_test.cc
vector_selection_test.cc
vector_sort_test.cc
test_util.cc)
Expand Down
56 changes: 56 additions & 0 deletions cpp/src/arrow/compute/kernels/vector_nested.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// Vector kernels involving nested types

#include "arrow/array/array_base.h"
#include "arrow/compute/kernels/common.h"
#include "arrow/result.h"

namespace arrow {
namespace compute {
namespace internal {

template <typename Type>
void ListFlatten(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
typename TypeTraits<Type>::ArrayType list_array(batch[0].array());
Result<std::shared_ptr<Array>> result = list_array.Flatten(ctx->memory_pool());
if (!result.ok()) {
ctx->SetStatus(result.status());
return;
}
out->value = (*result)->data();
}

static Result<ValueDescr> ValuesType(KernelContext*,
const std::vector<ValueDescr>& args) {
const auto& list_type = checked_cast<const BaseListType&>(*args[0].type);
return ValueDescr::Array(list_type.value_type());
}

void RegisterVectorNested(FunctionRegistry* registry) {
auto flatten = std::make_shared<VectorFunction>("list_flatten", Arity::Unary());
DCHECK_OK(flatten->AddKernel({InputType(Type::LIST)}, OutputType(ValuesType),
ListFlatten<ListType>));
DCHECK_OK(flatten->AddKernel({InputType(Type::LARGE_LIST)}, OutputType(ValuesType),
ListFlatten<LargeListType>));
DCHECK_OK(registry->AddFunction(std::move(flatten)));
}

} // namespace internal
} // namespace compute
} // namespace arrow
38 changes: 38 additions & 0 deletions cpp/src/arrow/compute/kernels/vector_nested_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include "arrow/compute/api.h"
#include "arrow/compute/kernels/test_util.h"
#include "arrow/result.h"
#include "arrow/testing/gtest_util.h"

namespace arrow {
namespace compute {

TEST(TestVectorNested, ListFlatten) {
for (auto ty : {list(int32()), large_list(int32())}) {
auto input = ArrayFromJSON(ty, "[[0, null, 1], null, [2, 3], []]");
auto expected = ArrayFromJSON(int32(), "[0, null, 1, 2, 3]");
ASSERT_OK_AND_ASSIGN(Datum out, CallFunction("list_flatten", {input}));
AssertArraysEqual(*expected, *out.make_array());
}
}

} // namespace compute
} // namespace arrow
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
// Vector functions
RegisterVectorHash(registry.get());
RegisterVectorSelection(registry.get());
RegisterVectorNested(registry.get());
RegisterVectorSort(registry.get());

return registry;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/registry_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ void RegisterScalarValidity(FunctionRegistry* registry);
// Vector functions
void RegisterVectorHash(FunctionRegistry* registry);
void RegisterVectorSelection(FunctionRegistry* registry);
void RegisterVectorNested(FunctionRegistry* registry);
void RegisterVectorSort(FunctionRegistry* registry);

// Aggregate functions
Expand Down
85 changes: 21 additions & 64 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1342,8 +1342,27 @@ cdef class Decimal128Array(FixedSizeBinaryArray):
Concrete class for Arrow arrays of decimal128 data type.
"""

cdef class BaseListArray(Array):

cdef class ListArray(Array):
def flatten(self):
"""
Unnest this ListArray/LargeListArray by one level.

The returned Array is logically a concatenation of all the sub-lists
in this Array.

Note that this method is different from ``self.values()`` in that
it takes care of the slicing offset as well as null elements backed
by non-empty sub-lists.

Returns
-------
result : Array
"""
return _pc().list_flatten(self)


cdef class ListArray(BaseListArray):
"""
Concrete class for Arrow arrays of a list data type.
"""
Expand Down Expand Up @@ -1389,39 +1408,8 @@ cdef class ListArray(Array):
"""
return pyarrow_wrap_array((<CListArray*> self.ap).offsets())

def flatten(self, MemoryPool memory_pool=None):
"""
Unnest this ListArray by one level.

The returned Array is logically a concatenation of all the sub-lists
in this Array.

Note that this method is different from ``self.values()`` in that
it takes care of the slicing offset as well as null elements backed
by non-empty sub-lists.

Parameters
----------
memory_pool : pyarrow.MemoryPool, optional
If not passed, will allocate memory from the currently-set default
memory pool

Returns
-------
result : Array
"""
cdef:
shared_ptr[CArray] c_result_array
CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool)
CListArray* arr = <CListArray*> self.ap

with nogil:
c_result_array = GetResultValue(arr.Flatten(cpool))

return pyarrow_wrap_array(c_result_array)


cdef class LargeListArray(Array):
cdef class LargeListArray(BaseListArray):
"""
Concrete class for Arrow arrays of a large list data type.

Expand Down Expand Up @@ -1470,37 +1458,6 @@ cdef class LargeListArray(Array):
"""
return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())

def flatten(self, MemoryPool memory_pool=None):
"""
Unnest this LargeListArray by one level.

The returned Array is logically a concatenation of all the sub-lists
in this Array.

Note that this method is different from ``self.values()`` in that
it takes care of the slicing offset as well as null elements backed
by non-empty sub-lists.

Parameters
----------
memory_pool : pyarrow.MemoryPool, optional
If not passed, will allocate memory from the currently-set default
memory pool.

Returns
-------
result : Array
"""
cdef:
shared_ptr[CArray] c_result_array
CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool)
CLargeListArray* arr = <CLargeListArray*> self.ap

with nogil:
c_result_array = GetResultValue(arr.Flatten(cpool))

return pyarrow_wrap_array(c_result_array)


cdef class MapArray(Array):
"""
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def func(left, right):
is_valid = _simple_unary_function('is_valid')
is_null = _simple_unary_function('is_null')

list_flatten = _simple_unary_function('list_flatten')

add = _simple_binary_function('add')
subtract = _simple_binary_function('subtract')
multiply = _simple_binary_function('multiply')
Expand Down
2 changes: 0 additions & 2 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,6 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int32_t value_length(int i)
shared_ptr[CArray] values()
shared_ptr[CArray] offsets()
CResult[shared_ptr[CArray]] Flatten(CMemoryPool* memory_pool)
shared_ptr[CDataType] value_type()

cdef cppclass CLargeListArray" arrow::LargeListArray"(CArray):
Expand All @@ -531,7 +530,6 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int64_t value_length(int i)
shared_ptr[CArray] values()
shared_ptr[CArray] offsets()
CResult[shared_ptr[CArray]] Flatten(CMemoryPool* memory_pool)
shared_ptr[CDataType] value_type()

cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray):
Expand Down
8 changes: 6 additions & 2 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -428,11 +428,15 @@ cdef class StructArray(Array):
pass


cdef class ListArray(Array):
cdef class BaseListArray(Array):
pass


cdef class LargeListArray(Array):
cdef class ListArray(BaseListArray):
pass


cdef class LargeListArray(BaseListArray):
pass


Expand Down