Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion cpp/src/arrow/compute/kernels/aggregate_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,26 @@ struct SumState {
ThisType local;
const auto values = array.raw_values();
const int64_t length = array.length();
for (int64_t i = 0; i < length; i++) {

constexpr int64_t kRoundFactor = 8;
const int64_t length_rounded = BitUtil::RoundDown(length, kRoundFactor);
typename SumType::c_type sum_rounded[kRoundFactor] = {0};

// Unrolled the loop to add the results in parrel
for (int64_t i = 0; i < length_rounded; i += kRoundFactor) {
for (int64_t k = 0; k < kRoundFactor; k++) {
sum_rounded[k] += values[i + k];
}
}
for (int64_t k = 0; k < kRoundFactor; k++) {
local.sum += sum_rounded[k];
}

// The trailing part
for (int64_t i = length_rounded; i < length; ++i) {
local.sum += values[i];
}

local.count = length;
return local;
}
Expand Down
21 changes: 16 additions & 5 deletions cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,23 +305,34 @@ BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapVectorizeUnroll<int64_t>)
->Apply(BenchmarkSetArgs);
#endif // ARROW_WITH_BENCHMARKS_REFERENCE

template <typename ArrowType>
static void SumKernel(benchmark::State& state) {
const int64_t array_size = state.range(0) / sizeof(int64_t);
using CType = typename TypeTraits<ArrowType>::CType;

const int64_t array_size = state.range(0) / sizeof(CType);
const double null_percent = static_cast<double>(state.range(1)) / 100.0;
auto rand = random::RandomArrayGenerator(1923);
auto array = std::static_pointer_cast<NumericArray<Int64Type>>(
rand.Int64(array_size, -100, 100, null_percent));
auto array = rand.Numeric<ArrowType>(array_size, -100, 100, null_percent);

for (auto _ : state) {
ABORT_NOT_OK(Sum(array).status());
}

state.counters["size"] = static_cast<double>(state.range(0));
state.counters["null_percent"] = static_cast<double>(state.range(1));
state.SetBytesProcessed(state.iterations() * array_size * sizeof(int64_t));
state.SetBytesProcessed(state.iterations() * array_size * sizeof(CType));
}

BENCHMARK(SumKernel)->Apply(RegressionSetArgs);
#define SUM_KERNEL_BENCHMARK(FuncName, Type) \
static void FuncName(benchmark::State& state) { SumKernel<Type>(state); } \
BENCHMARK(FuncName)->Apply(RegressionSetArgs)

SUM_KERNEL_BENCHMARK(SumKernelFloat, FloatType);
SUM_KERNEL_BENCHMARK(SumKernelDouble, DoubleType);
SUM_KERNEL_BENCHMARK(SumKernelInt8, Int8Type);
SUM_KERNEL_BENCHMARK(SumKernelInt16, Int16Type);
SUM_KERNEL_BENCHMARK(SumKernelInt32, Int32Type);
SUM_KERNEL_BENCHMARK(SumKernelInt64, Int64Type);

} // namespace compute
} // namespace arrow