Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,6 @@ list(APPEND
compute/exec/hash_join.cc
compute/exec/hash_join_dict.cc
compute/exec/hash_join_node.cc
compute/exec/key_hash.cc
compute/exec/key_map.cc
compute/exec/map_node.cc
compute/exec/options.cc
compute/exec/order_by_impl.cc
Expand All @@ -420,6 +418,8 @@ list(APPEND
compute/function.cc
compute/function_internal.cc
compute/kernel.cc
compute/key_hash.cc
compute/key_map.cc
compute/light_array.cc
compute/ordering.cc
compute/registry.cc
Expand All @@ -439,11 +439,12 @@ list(APPEND
compute/row/encode_internal.cc
compute/row/compare_internal.cc
compute/row/grouper.cc
compute/row/row_internal.cc)
compute/row/row_internal.cc
compute/util.cc)

append_avx2_src(compute/exec/bloom_filter_avx2.cc)
append_avx2_src(compute/exec/key_hash_avx2.cc)
append_avx2_src(compute/exec/key_map_avx2.cc)
append_avx2_src(compute/key_hash_avx2.cc)
append_avx2_src(compute/key_map_avx2.cc)
append_avx2_src(compute/exec/swiss_join_avx2.cc)
append_avx2_src(compute/exec/util_avx2.cc)
append_avx2_src(compute/row/compare_internal_avx2.cc)
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/compute/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ add_arrow_compute_test(internals_test
exec_test.cc
kernel_test.cc
light_array_test.cc
registry_test.cc)
registry_test.cc
key_hash_test.cc)

add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")

Expand Down
9 changes: 6 additions & 3 deletions cpp/src/arrow/compute/exec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,13 @@ add_arrow_compute_test(hash_join_node_test
"arrow-compute"
SOURCES
hash_join_node_test.cc
bloom_filter_test.cc
key_hash_test.cc)
bloom_filter_test.cc)
add_arrow_compute_test(pivot_longer_node_test
PREFIX
"arrow-compute"
SOURCES
pivot_longer_node_test.cc
test_nodes.cc)

add_arrow_compute_test(asof_join_node_test
REQUIRE_ALL_KERNELS
PREFIX
Expand All @@ -71,6 +69,11 @@ add_arrow_compute_test(util_test
SOURCES
util_test.cc
task_util_test.cc)
add_arrow_compute_test(light_array_exec_test
PREFIX
"arrow-compute"
SOURCES
light_array_exec_test.cc)

add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec/asof_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
#include "arrow/array/builder_binary.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/compute/exec/exec_plan.h"
#include "arrow/compute/exec/key_hash.h"
#include "arrow/compute/exec/options.h"
#include "arrow/compute/exec/query_context.h"
#include "arrow/compute/exec/schema_util.h"
#include "arrow/compute/exec/util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/light_array.h"
#include "arrow/record_batch.h"
#include "arrow/result.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec/bloom_filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
#include <thread>
#include <unordered_set>
#include "arrow/compute/exec/bloom_filter.h"
#include "arrow/compute/exec/key_hash.h"
#include "arrow/compute/exec/task_util.h"
#include "arrow/compute/exec/test_util.h"
#include "arrow/compute/exec/util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/cpu_info.h"

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec/hash_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
#include "arrow/compute/exec/hash_join.h"
#include "arrow/compute/exec/hash_join_dict.h"
#include "arrow/compute/exec/hash_join_node.h"
#include "arrow/compute/exec/key_hash.h"
#include "arrow/compute/exec/options.h"
#include "arrow/compute/exec/schema_util.h"
#include "arrow/compute/exec/util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/future.h"
#include "arrow/util/thread_pool.h"
Expand Down
172 changes: 172 additions & 0 deletions cpp/src/arrow/compute/exec/light_array_exec_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/light_array.h"

#include <gtest/gtest.h>

#include "arrow/compute/exec/test_util.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"

namespace arrow {
namespace compute {

TEST(KeyColumnArray, FromExecBatch) {
ExecBatch batch =
ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]");
std::vector<KeyColumnArray> arrays;
ASSERT_OK(ColumnArraysFromExecBatch(batch, &arrays));

ASSERT_EQ(2, arrays.size());
ASSERT_EQ(8, arrays[0].metadata().fixed_length);
ASSERT_EQ(0, arrays[1].metadata().fixed_length);
ASSERT_EQ(3, arrays[0].length());
ASSERT_EQ(3, arrays[1].length());

ASSERT_OK(ColumnArraysFromExecBatch(batch, 1, 1, &arrays));

ASSERT_EQ(2, arrays.size());
ASSERT_EQ(8, arrays[0].metadata().fixed_length);
ASSERT_EQ(0, arrays[1].metadata().fixed_length);
ASSERT_EQ(1, arrays[0].length());
ASSERT_EQ(1, arrays[1].length());
}

TEST(ExecBatchBuilder, AppendBatches) {
std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
MemoryPool* pool = owned_pool.get();
ExecBatch batch_one =
ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]");
ExecBatch batch_two =
ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]");
ExecBatch combined = ExecBatchFromJSON(
{int64(), boolean()},
"[[1, true], [2, false], [null, null], [null, true], [5, true], [6, false]]");
{
ExecBatchBuilder builder;
uint16_t row_ids[3] = {0, 1, 2};
ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/2));
ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/2));
ExecBatch built = builder.Flush();
ASSERT_EQ(combined, built);
ASSERT_NE(0, pool->bytes_allocated());
}
ASSERT_EQ(0, pool->bytes_allocated());
}

TEST(ExecBatchBuilder, AppendBatchesSomeRows) {
std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
MemoryPool* pool = owned_pool.get();
ExecBatch batch_one =
ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]");
ExecBatch batch_two =
ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]");
ExecBatch combined = ExecBatchFromJSON(
{int64(), boolean()}, "[[1, true], [2, false], [null, true], [5, true]]");
{
ExecBatchBuilder builder;
uint16_t row_ids[2] = {0, 1};
ASSERT_OK(builder.AppendSelected(pool, batch_one, 2, row_ids, /*num_cols=*/2));
ASSERT_OK(builder.AppendSelected(pool, batch_two, 2, row_ids, /*num_cols=*/2));
ExecBatch built = builder.Flush();
ASSERT_EQ(combined, built);
ASSERT_NE(0, pool->bytes_allocated());
}
ASSERT_EQ(0, pool->bytes_allocated());
}

TEST(ExecBatchBuilder, AppendBatchesSomeCols) {
std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
MemoryPool* pool = owned_pool.get();
ExecBatch batch_one =
ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]");
ExecBatch batch_two =
ExecBatchFromJSON({int64(), boolean()}, "[[null, true], [5, true], [6, false]]");
ExecBatch first_col_only =
ExecBatchFromJSON({int64()}, "[[1], [2], [null], [null], [5], [6]]");
ExecBatch last_col_only = ExecBatchFromJSON(
{boolean()}, "[[true], [false], [null], [true], [true], [false]]");
{
ExecBatchBuilder builder;
uint16_t row_ids[3] = {0, 1, 2};
int first_col_ids[1] = {0};
ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1,
first_col_ids));
ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1,
first_col_ids));
ExecBatch built = builder.Flush();
ASSERT_EQ(first_col_only, built);
ASSERT_NE(0, pool->bytes_allocated());
}
{
ExecBatchBuilder builder;
uint16_t row_ids[3] = {0, 1, 2};
// If we don't specify col_ids and num_cols is 1 it is implicitly the first col
ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1));
ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1));
ExecBatch built = builder.Flush();
ASSERT_EQ(first_col_only, built);
ASSERT_NE(0, pool->bytes_allocated());
}
{
ExecBatchBuilder builder;
uint16_t row_ids[3] = {0, 1, 2};
int last_col_ids[1] = {1};
ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/1,
last_col_ids));
ASSERT_OK(builder.AppendSelected(pool, batch_two, 3, row_ids, /*num_cols=*/1,
last_col_ids));
ExecBatch built = builder.Flush();
ASSERT_EQ(last_col_only, built);
ASSERT_NE(0, pool->bytes_allocated());
}
ASSERT_EQ(0, pool->bytes_allocated());
}

TEST(ExecBatchBuilder, AppendNulls) {
std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
MemoryPool* pool = owned_pool.get();
ExecBatch batch_one =
ExecBatchFromJSON({int64(), boolean()}, "[[1, true], [2, false], [null, null]]");
ExecBatch combined = ExecBatchFromJSON(
{int64(), boolean()},
"[[1, true], [2, false], [null, null], [null, null], [null, null]]");
ExecBatch just_nulls =
ExecBatchFromJSON({int64(), boolean()}, "[[null, null], [null, null]]");
{
ExecBatchBuilder builder;
uint16_t row_ids[3] = {0, 1, 2};
ASSERT_OK(builder.AppendSelected(pool, batch_one, 3, row_ids, /*num_cols=*/2));
ASSERT_OK(builder.AppendNulls(pool, {int64(), boolean()}, 2));
ExecBatch built = builder.Flush();
ASSERT_EQ(combined, built);
ASSERT_NE(0, pool->bytes_allocated());
}
{
ExecBatchBuilder builder;
ASSERT_OK(builder.AppendNulls(pool, {int64(), boolean()}, 2));
ExecBatch built = builder.Flush();
ASSERT_EQ(just_nulls, built);
ASSERT_NE(0, pool->bytes_allocated());
}
ASSERT_EQ(0, pool->bytes_allocated());
}

} // namespace compute
} // namespace arrow
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec/swiss_join.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
#include <mutex>
#include "arrow/array/util.h" // MakeArrayFromScalar
#include "arrow/compute/exec/hash_join.h"
#include "arrow/compute/exec/key_hash.h"
#include "arrow/compute/exec/swiss_join_internal.h"
#include "arrow/compute/exec/util.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/util/bit_util.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec/swiss_join_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
#pragma once

#include <cstdint>
#include "arrow/compute/exec/key_map.h"
#include "arrow/compute/exec/options.h"
#include "arrow/compute/exec/partition_util.h"
#include "arrow/compute/exec/schema_util.h"
#include "arrow/compute/exec/task_util.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_map.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/row/encode_internal.h"

Expand Down
Loading