Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cpp/apidoc/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,8 @@ EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = *-test.cc \
*test* \
*_generated.h \
*-benchmark.cc
*-benchmark.cc \
*internal*

# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_arrow_compute_test(scalar_test
test_util.cc)

add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@

#include "arrow/builder.h"
#include "arrow/compute/api.h"
#include "arrow/compute/benchmark_util.h"
#include "arrow/memory_pool.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/util/benchmark_util.h"
#include "arrow/util/bit_util.h"

namespace arrow {
Expand Down
24 changes: 10 additions & 14 deletions cpp/src/arrow/compute/kernels/codegen_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,27 +215,23 @@ template <typename Type>
struct BoxScalar<Type, enable_if_has_c_type<Type>> {
using T = typename GetOutputType<Type>::T;
using ScalarType = typename TypeTraits<Type>::ScalarType;
static std::shared_ptr<Scalar> Box(T val, const std::shared_ptr<DataType>& type) {
return std::make_shared<ScalarType>(val, type);
}
static void Box(T val, Scalar* out) { checked_cast<ScalarType*>(out)->value = val; }
};

template <typename Type>
struct BoxScalar<Type, enable_if_base_binary<Type>> {
using T = typename GetOutputType<Type>::T;
using ScalarType = typename TypeTraits<Type>::ScalarType;
static std::shared_ptr<Scalar> Box(T val, const std::shared_ptr<DataType>&) {
return std::make_shared<ScalarType>(val);
static void Box(T val, Scalar* out) {
checked_cast<ScalarType*>(out)->value = std::make_shared<Buffer>(val);
}
};

template <>
struct BoxScalar<Decimal128Type> {
using T = Decimal128;
using ScalarType = Decimal128Scalar;
static std::shared_ptr<Scalar> Box(T val, const std::shared_ptr<DataType>& type) {
return std::make_shared<ScalarType>(val, type);
}
static void Box(T val, Scalar* out) { checked_cast<ScalarType*>(out)->value = val; }
Comment on lines -218 to +234
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The prior implementation was causing a lot of compiled code to be generated for some reason, FYI

};

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -396,8 +392,8 @@ struct ScalarUnary {
static void Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
if (arg0.is_valid) {
ARG0 arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
out->value = BoxScalar<OutType>::Box(Op::template Call<OUT, ARG0>(ctx, arg0_val),
out->type());
BoxScalar<OutType>::Box(Op::template Call<OUT, ARG0>(ctx, arg0_val),
out->scalar().get());
} else {
out->value = MakeNullScalar(arg0.type);
}
Expand Down Expand Up @@ -533,8 +529,8 @@ struct ScalarUnaryNotNullStateful {
void Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
if (arg0.is_valid) {
ARG0 arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
out->value = BoxScalar<OutType>::Box(
this->op.template Call<OUT, ARG0>(ctx, arg0_val), out->type());
BoxScalar<OutType>::Box(this->op.template Call<OUT, ARG0>(ctx, arg0_val),
out->scalar().get());
} else {
out->value = MakeNullScalar(arg0.type);
}
Expand Down Expand Up @@ -615,8 +611,8 @@ struct ScalarBinary {
if (out->scalar()->is_valid) {
auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
out->value = BoxScalar<OutType>::Box(Op::template Call(ctx, arg0_val, arg1_val),
out->type());
BoxScalar<OutType>::Box(Op::template Call(ctx, arg0_val, arg1_val),
out->scalar().get());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
#include <vector>

#include "arrow/compute/api_scalar.h"
#include "arrow/compute/benchmark_util.h"
#include "arrow/compute/kernels/test_util.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/util/benchmark_util.h"

namespace arrow {
namespace compute {
Expand Down
117 changes: 117 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_cast_benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "benchmark/benchmark.h"

#include <vector>

#include "arrow/compute/cast.h"
#include "arrow/compute/kernels/test_util.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/util/benchmark_util.h"

namespace arrow {
namespace compute {

constexpr auto kSeed = 0x94378165;

template <typename InputType, typename CType = typename InputType::c_type>
static void BenchmarkNumericCast(benchmark::State& state,
std::shared_ptr<DataType> to_type,
const CastOptions& options, CType min, CType max) {
GenericItemsArgs args(state);
random::RandomArrayGenerator rand(kSeed);
auto array = rand.Numeric<InputType>(args.size, min, max, args.null_proportion);
for (auto _ : state) {
ABORT_NOT_OK(Cast(array, to_type, options).status());
}
}

template <typename InputType, typename CType = typename InputType::c_type>
static void BenchmarkFloatingToIntegerCast(benchmark::State& state,
std::shared_ptr<DataType> from_type,
std::shared_ptr<DataType> to_type,
const CastOptions& options, CType min,
CType max) {
GenericItemsArgs args(state);
random::RandomArrayGenerator rand(kSeed);
auto array = rand.Numeric<InputType>(args.size, min, max, args.null_proportion);

std::shared_ptr<Array> values_as_float = *Cast(*array, from_type);

for (auto _ : state) {
ABORT_NOT_OK(Cast(values_as_float, to_type, options).status());
}
}

std::vector<int64_t> g_data_sizes = {kL2Size};

void CastSetArgs(benchmark::internal::Benchmark* bench) {
for (int64_t size : g_data_sizes) {
for (auto nulls : std::vector<ArgsType>({1000, 10, 2, 1, 0})) {
bench->Args({static_cast<ArgsType>(size), nulls});
}
}
}

static constexpr int32_t kInt32Min = std::numeric_limits<int32_t>::min();
static constexpr int32_t kInt32Max = std::numeric_limits<int32_t>::max();

static void CastInt64ToInt32Safe(benchmark::State& state) {
BenchmarkNumericCast<Int64Type>(state, int32(), CastOptions::Safe(), kInt32Min,
kInt32Max);
}

static void CastInt64ToInt32Unsafe(benchmark::State& state) {
BenchmarkNumericCast<Int64Type>(state, int32(), CastOptions::Unsafe(), kInt32Min,
kInt32Max);
}

static void CastUInt32ToInt32Safe(benchmark::State& state) {
BenchmarkNumericCast<UInt32Type>(state, int32(), CastOptions::Safe(), 0, kInt32Max);
}

static void CastInt64ToDoubleSafe(benchmark::State& state) {
BenchmarkNumericCast<Int64Type>(state, float64(), CastOptions::Safe(), 0, 1000);
}

static void CastInt64ToDoubleUnsafe(benchmark::State& state) {
BenchmarkNumericCast<Int64Type>(state, float64(), CastOptions::Unsafe(), 0, 1000);
}

static void CastDoubleToInt32Safe(benchmark::State& state) {
BenchmarkFloatingToIntegerCast<Int32Type>(state, float64(), int32(),
CastOptions::Safe(), -1000, 1000);
}

static void CastDoubleToInt32Unsafe(benchmark::State& state) {
BenchmarkFloatingToIntegerCast<Int32Type>(state, float64(), int32(),
CastOptions::Unsafe(), -1000, 1000);
}

BENCHMARK(CastInt64ToInt32Safe)->Apply(CastSetArgs);
BENCHMARK(CastInt64ToInt32Unsafe)->Apply(CastSetArgs);
BENCHMARK(CastUInt32ToInt32Safe)->Apply(CastSetArgs);

BENCHMARK(CastInt64ToDoubleSafe)->Apply(CastSetArgs);
BENCHMARK(CastInt64ToDoubleUnsafe)->Apply(CastSetArgs);
BENCHMARK(CastDoubleToInt32Safe)->Apply(CastSetArgs);
BENCHMARK(CastDoubleToInt32Unsafe)->Apply(CastSetArgs);

} // namespace compute
} // namespace arrow
Loading