Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ namespace compute {
// Arithmetic

SCALAR_EAGER_BINARY(Add, "add")
SCALAR_EAGER_BINARY(Subtract, "subtract")
SCALAR_EAGER_BINARY(Multiply, "multiply")

// ----------------------------------------------------------------------
// Set-related operations
Expand Down
30 changes: 25 additions & 5 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,36 @@ namespace compute {

// ----------------------------------------------------------------------

/// \brief Add two values together. Array values must be the same length. If a
/// value is null in either addend, the result is null
/// \brief Add two values together. Array values must be the same length. If
/// either addend is null the result will be null.
///
/// \param[in] left the first value
/// \param[in] right the second value
/// \param[in] left the first addend
/// \param[in] right the second addend
/// \param[in] ctx the function execution context, optional
/// \return the elementwise addition of the values
/// \return the elementwise sum
ARROW_EXPORT
Result<Datum> Add(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);

/// \brief Subtract two values. Array values must be the same length. If the
/// minuend or subtrahend is null the result will be null.
///
/// \param[in] left the value subtracted from (minuend)
/// \param[in] right the value by which the minuend is reduced (subtrahend)
/// \param[in] ctx the function execution context, optional
/// \return the elementwise difference
ARROW_EXPORT
Result<Datum> Subtract(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);

/// \brief Multiply two values. Array values must be the same length. If either
/// factor is null the result will be null.
///
/// \param[in] left the first factor
/// \param[in] right the second factor
/// \param[in] ctx the function execution context, optional
/// \return the elementwise product
ARROW_EXPORT
Result<Datum> Multiply(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);

enum CompareOperator {
EQUAL,
NOT_EQUAL,
Expand Down
90 changes: 18 additions & 72 deletions cpp/src/arrow/compute/kernels/codegen_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,25 +337,6 @@ void ScalarPrimitiveExecUnary(KernelContext* ctx, const ExecBatch& batch, Datum*
}
}

template <typename Op, typename OutType, typename Arg0Type, typename Arg1Type>
void ScalarPrimitiveExecBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
using OUT = typename OutType::c_type;
using ARG0 = typename Arg0Type::c_type;
using ARG1 = typename Arg1Type::c_type;

if (batch[0].kind() == Datum::SCALAR || batch[1].kind() == Datum::SCALAR) {
ctx->SetStatus(Status::NotImplemented("NYI"));
} else {
ArrayData* out_arr = out->mutable_array();
auto out_data = out_arr->GetMutableValues<OUT>(1);
auto arg0_data = batch[0].array()->GetValues<ARG0>(1);
auto arg1_data = batch[1].array()->GetValues<ARG1>(1);
for (int64_t i = 0; i < batch.length; ++i) {
*out_data++ = Op::template Call<OUT, ARG0, ARG1>(ctx, *arg0_data++, *arg1_data++);
}
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're going to remove this, you absolutely must write benchmarks to show that the more general version is not slower.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Writing a benchmark, the jira to track it https://issues.apache.org/jira/browse/ARROW-9079


// OutputAdapter allows passing an inlineable lambda that provides a sequence
// of output values to write into output memory. Boolean and primitive outputs
// are currently implemented, and the validity bitmap is presumed to be handled
Expand Down Expand Up @@ -610,63 +591,65 @@ struct ScalarUnaryNotNull {
// // implementation
// }
// };
template <typename OutType, typename Arg0Type, typename Arg1Type, typename Op,
typename FlippedOp = Op>
template <typename OutType, typename Arg0Type, typename Arg1Type, typename Op>
struct ScalarBinary {
using OUT = typename GetOutputType<OutType>::T;
using ARG0 = typename GetViewType<Arg0Type>::T;
using ARG1 = typename GetViewType<Arg1Type>::T;

template <typename ChosenOp>
static void ArrayArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
ArrayIterator<Arg0Type> arg0(*batch[0].array());
ArrayIterator<Arg1Type> arg1(*batch[1].array());
OutputAdapter<OutType>::Write(ctx, out, [&]() -> OUT {
return ChosenOp::template Call(ctx, arg0(), arg1());
return Op::template Call(ctx, arg0(), arg1());
});
}

template <typename ChosenOp>
static void ArrayScalar(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
ArrayIterator<Arg0Type> arg0(*batch[0].array());
auto arg1 = UnboxScalar<Arg1Type>::Unbox(batch[1]);
OutputAdapter<OutType>::Write(ctx, out, [&]() -> OUT {
return ChosenOp::template Call(ctx, arg0(), arg1);
return Op::template Call(ctx, arg0(), arg1);
});
}

static void ScalarArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
auto arg0 = UnboxScalar<Arg0Type>::Unbox(batch[0]);
ArrayIterator<Arg1Type> arg1(*batch[1].array());
OutputAdapter<OutType>::Write(ctx, out, [&]() -> OUT {
return Op::template Call(ctx, arg0, arg1());
});
}

template <typename ChosenOp>
static void ScalarScalar(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
auto arg0 = UnboxScalar<Arg0Type>::Unbox(batch[0]);
auto arg1 = UnboxScalar<Arg1Type>::Unbox(batch[1]);
out->value = BoxScalar<OutType>::Box(ChosenOp::template Call(ctx, arg0, arg1),
out->value = BoxScalar<OutType>::Box(Op::template Call(ctx, arg0, arg1),
out->type());
}

static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {

if (batch[0].kind() == Datum::ARRAY) {
if (batch[1].kind() == Datum::ARRAY) {
return ArrayArray<Op>(ctx, batch, out);
return ArrayArray(ctx, batch, out);
} else {
return ArrayScalar<Op>(ctx, batch, out);
return ArrayScalar(ctx, batch, out);
}
} else {
if (batch[1].kind() == Datum::ARRAY) {
// e.g. if we were doing scalar < array, we flip and do array >= scalar
return BinaryExecFlipped(ctx, ArrayScalar<FlippedOp>, batch, out);
return ScalarArray(ctx, batch, out);
} else {
return ScalarScalar<Op>(ctx, batch, out);
return ScalarScalar(ctx, batch, out);
}
}
}
};

// A kernel exec generator for binary kernels where both input types are the
// same
template <typename OutType, typename ArgType, typename Op,
typename FlippedOp = Op>
using ScalarBinaryEqualTypes = ScalarBinary<OutType, ArgType, ArgType, Op, FlippedOp>;
template <typename OutType, typename ArgType, typename Op>
using ScalarBinaryEqualTypes = ScalarBinary<OutType, ArgType, ArgType, Op>;

// ----------------------------------------------------------------------
// Dynamic kernel selectors. These functors allow a kernel implementation to be
Expand Down Expand Up @@ -726,43 +709,6 @@ ArrayKernelExec NumericEqualTypesUnary(detail::GetTypeId get_id) {
}
}

// Generate a kernel given a functor of type
//
// struct OPERATOR_NAME {
// template <typename OUT, typename ARG0, typename ARG1>
// static OUT Call(KernelContext*, ARG0 left, ARG1 right) {
// // IMPLEMENTATION
// }
// };
template <typename Op>
ArrayKernelExec NumericEqualTypesBinary(detail::GetTypeId get_id) {
switch (get_id.id) {
case Type::INT8:
return ScalarPrimitiveExecBinary<Op, Int8Type, Int8Type, Int8Type>;
case Type::UINT8:
return ScalarPrimitiveExecBinary<Op, UInt8Type, UInt8Type, UInt8Type>;
case Type::INT16:
return ScalarPrimitiveExecBinary<Op, Int16Type, Int16Type, Int16Type>;
case Type::UINT16:
return ScalarPrimitiveExecBinary<Op, UInt16Type, UInt16Type, UInt16Type>;
case Type::INT32:
return ScalarPrimitiveExecBinary<Op, Int32Type, Int32Type, Int32Type>;
case Type::UINT32:
return ScalarPrimitiveExecBinary<Op, UInt32Type, UInt32Type, UInt32Type>;
case Type::INT64:
return ScalarPrimitiveExecBinary<Op, Int64Type, Int64Type, Int64Type>;
case Type::UINT64:
return ScalarPrimitiveExecBinary<Op, UInt64Type, UInt64Type, UInt64Type>;
case Type::FLOAT:
return ScalarPrimitiveExecBinary<Op, FloatType, FloatType, FloatType>;
case Type::DOUBLE:
return ScalarPrimitiveExecBinary<Op, DoubleType, DoubleType, DoubleType>;
default:
DCHECK(false);
return ExecFail;
}
}

// Generate a kernel given a templated functor. This template effectively
// "curries" the first type argument. The functor must be of the form:
//
Expand Down
141 changes: 134 additions & 7 deletions cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,150 @@
// under the License.

#include "arrow/compute/kernels/common.h"
#include "arrow/util/int_util.h"

namespace arrow {
namespace compute {

template <typename T>
using is_unsigned_integer = std::integral_constant<bool, std::is_integral<T>::value &&
std::is_unsigned<T>::value>;

template <typename T>
using is_signed_integer =
std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;

template <typename T>
using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, T>;

template <typename T>
using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, T>;

template <typename T>
using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;

template <typename T, typename Unsigned = typename std::make_unsigned<T>::type>
constexpr Unsigned to_unsigned(T signed_) {
return static_cast<Unsigned>(signed_);
}

struct Add {
template <typename OUT, typename ARG0, typename ARG1>
static constexpr OUT Call(KernelContext*, ARG0 left, ARG1 right) {
template <typename T>
static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
return left + right;
}

template <typename T>
static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
return left + right;
}

template <typename T>
static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
return to_unsigned(left) + to_unsigned(right);
}
};

struct Subtract {
template <typename T>
static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
return left - right;
}

template <typename T>
static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
return left - right;
}

template <typename T>
static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
return to_unsigned(left) - to_unsigned(right);
}
};

struct Multiply {
static_assert(std::is_same<decltype(int8_t() * int8_t()), int32_t>::value, "");
static_assert(std::is_same<decltype(uint8_t() * uint8_t()), int32_t>::value, "");
static_assert(std::is_same<decltype(int16_t() * int16_t()), int32_t>::value, "");
static_assert(std::is_same<decltype(uint16_t() * uint16_t()), int32_t>::value, "");
static_assert(std::is_same<decltype(int32_t() * int32_t()), int32_t>::value, "");

static_assert(std::is_same<decltype(uint32_t() * uint32_t()), uint32_t>::value, "");

static_assert(std::is_same<decltype(int64_t() * int64_t()), int64_t>::value, "");
static_assert(std::is_same<decltype(uint64_t() * uint64_t()), uint64_t>::value, "");

template <typename T>
static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
return left * right;
}

template <typename T>
static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
return left * right;
}

template <typename T>
static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
return to_unsigned(left) * to_unsigned(right);
}

// Multiplication of 16 bit integer types implicitly promotes to signed 32 bit
// integer. However, some inputs may nevertheless overflow (which triggers undefined
// behaviour). Therefore we first cast to 32 bit unsigned integers where overflow is
// well defined.
template <typename T = void>
static constexpr int16_t Call(KernelContext*, int16_t left, int16_t right) {
return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
}
template <typename T = void>
static constexpr uint16_t Call(KernelContext*, uint16_t left, uint16_t right) {
return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
}
};

namespace codegen {

// Generate a kernel given an arithmetic functor
//
// To avoid undefined behaviour of signed integer overflow treat the signed
// input argument values as unsigned then cast them to signed making them wrap
// around.
template <typename Op>
ArrayKernelExec NumericEqualTypesBinary(detail::GetTypeId get_id) {
switch (get_id.id) {
case Type::INT8:
return ScalarBinaryEqualTypes<Int8Type, Int8Type, Op>::Exec;
case Type::UINT8:
return ScalarBinaryEqualTypes<UInt8Type, UInt8Type, Op>::Exec;
case Type::INT16:
return ScalarBinaryEqualTypes<Int16Type, Int16Type, Op>::Exec;
case Type::UINT16:
return ScalarBinaryEqualTypes<UInt16Type, UInt16Type, Op>::Exec;
case Type::INT32:
return ScalarBinaryEqualTypes<Int32Type, Int32Type, Op>::Exec;
case Type::UINT32:
return ScalarBinaryEqualTypes<UInt32Type, UInt32Type, Op>::Exec;
case Type::INT64:
return ScalarBinaryEqualTypes<Int64Type, Int64Type, Op>::Exec;
case Type::UINT64:
return ScalarBinaryEqualTypes<UInt64Type, UInt64Type, Op>::Exec;
case Type::FLOAT:
return ScalarBinaryEqualTypes<FloatType, FloatType, Op>::Exec;
case Type::DOUBLE:
return ScalarBinaryEqualTypes<DoubleType, DoubleType, Op>::Exec;
default:
DCHECK(false);
return ExecFail;
}
}

template <typename Op>
void MakeBinaryFunction(std::string name, FunctionRegistry* registry) {
void AddBinaryFunction(std::string name, FunctionRegistry* registry) {
auto func = std::make_shared<ScalarFunction>(name, Arity::Binary());
for (const std::shared_ptr<DataType>& ty : NumericTypes()) {
DCHECK_OK(func->AddKernel({InputType::Array(ty), InputType::Array(ty)}, ty,
NumericEqualTypesBinary<Op>(*ty)));
for (const auto& ty : NumericTypes()) {
auto exec = codegen::NumericEqualTypesBinary<Op>(ty);
DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
}
DCHECK_OK(registry->AddFunction(std::move(func)));
}
Expand All @@ -44,7 +169,9 @@ void MakeBinaryFunction(std::string name, FunctionRegistry* registry) {
namespace internal {

void RegisterScalarArithmetic(FunctionRegistry* registry) {
codegen::MakeBinaryFunction<Add>("add", registry);
codegen::AddBinaryFunction<Add>("add", registry);
codegen::AddBinaryFunction<Subtract>("subtract", registry);
codegen::AddBinaryFunction<Multiply>("multiply", registry);
}

} // namespace internal
Expand Down
Loading