From 7c5bdf714465bd93b1139c75a5bfe11198d3352d Mon Sep 17 00:00:00 2001 From: Yibo Cai Date: Tue, 31 Aug 2021 10:13:24 +0000 Subject: [PATCH 1/2] ARROW-13067: [C++][Compute] Implement integer to decimal cast --- .../compute/kernels/scalar_arithmetic_test.cc | 14 ++++- .../compute/kernels/scalar_cast_numeric.cc | 55 +++++++++++++++++++ .../arrow/compute/kernels/scalar_cast_test.cc | 34 +++++++++++- 3 files changed, 100 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc index 2939e47666e..ce3588fb432 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc @@ -1365,6 +1365,18 @@ TEST(TestBinaryDecimalArithmetic, DispatchBest) { } } + // decimal, integer + for (std::string name : {"add", "subtract", "multiply", "divide"}) { + for (std::string suffix : {"", "_checked"}) { + name += suffix; + + CheckDispatchBest(name, {int64(), decimal128(1, 0)}, + {decimal128(1, 0), decimal128(1, 0)}); + CheckDispatchBest(name, {decimal128(1, 0), int64()}, + {decimal128(1, 0), decimal128(1, 0)}); + } + } + // decimal, decimal for (std::string name : {"add", "subtract"}) { for (std::string suffix : {"", "_checked"}) { @@ -1410,8 +1422,6 @@ TEST(TestBinaryDecimalArithmetic, DispatchBest) { {decimal256(6, 4), decimal256(6, 4)}); } } - - // TODO(ARROW-13067): add 'integer, decimal' tests } // reference result from bc (precsion=100, scale=40) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index cd89a57ed77..dcc03be1c95 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -391,6 +391,49 @@ struct CastFunctor + OutValue Call(KernelContext*, IntegerType val, Status* st) const { + auto maybe_decimal = OutValue(val).Rescale(0, out_scale_); + if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) { + return maybe_decimal.MoveValueUnsafe(); + } + *st = maybe_decimal.status(); + return OutValue{}; + } + + int32_t out_scale_; +}; + +template +struct CastFunctor::value && is_integer_type::value>> { + static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + const auto& out_type = checked_cast(*out->type()); + const auto out_scale = out_type.scale(); + const auto out_precision = out_type.precision(); + + // verify precision and scale + if (out_scale < 0) { + return Status::Invalid("Scale must be non-negative"); + } + // maximal number of decimal digits for int8/16/32/64 + constexpr std::array decimal_digits{3, 5, 10, 19}; + using ctype = typename I::c_type; + static_assert(sizeof(ctype) <= 8, ""); + if (out_precision < decimal_digits[BitUtil::Log2(sizeof(ctype))] + out_scale) { + return Status::Invalid("Invalid output precision and scale"); + } + + applicator::ScalarUnaryNotNullStateful kernel( + IntegerToDecimal{out_scale}); + return kernel.Exec(ctx, batch, out); + } +}; + // ---------------------------------------------------------------------- // Decimal to decimal @@ -641,6 +684,12 @@ std::shared_ptr GetCastToDecimal128() { DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty, CastFunctor::Exec)); + // Cast from integer + for (const std::shared_ptr& in_ty : IntTypes()) { + auto exec = GenerateInteger(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + // Cast from other decimal auto exec = CastFunctor::Exec; // We resolve the output type of this kernel from the CastOptions @@ -664,6 +713,12 @@ std::shared_ptr GetCastToDecimal256() { DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty, CastFunctor::Exec)); + // Cast from integer + for (const std::shared_ptr& in_ty : IntTypes()) { + auto exec = GenerateInteger(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + // Cast from other decimal auto exec = CastFunctor::Exec; DCHECK_OK( diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 90d41894578..fc7e42aca6f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -71,9 +71,11 @@ static std::vector> kNumericTypes = { uint8(), int8(), uint16(), int16(), uint32(), int32(), uint64(), int64(), float32(), float64()}; -static std::vector> kDictionaryIndexTypes = { +static std::vector> kIntegerTypes = { int8(), uint8(), int16(), uint16(), int32(), uint32(), int64(), uint64()}; +static std::vector> kDictionaryIndexTypes = kIntegerTypes; + static std::vector> kBaseBinaryTypes = { binary(), utf8(), large_binary(), large_utf8()}; @@ -587,6 +589,36 @@ TEST(Cast, Decimal256ToInt) { CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000, -120000]"), options); } +TEST(Cast, IntegerToDecimal) { + for (auto decimal_type : {decimal128(21, 2), decimal256(21, 2)}) { + for (auto integer_type : kIntegerTypes) { + CheckCast( + ArrayFromJSON(integer_type, "[0, 7, null, 100, 99]"), + ArrayFromJSON(decimal_type, R"(["0.00", "7.00", null, "100.00", "99.00"])")); + } + } + + // extreme value + for (auto decimal_type : {decimal128(19, 0), decimal256(19, 0)}) { + CheckCast(ArrayFromJSON(int64(), "[-9223372036854775808, 9223372036854775807]"), + ArrayFromJSON(decimal_type, + R"(["-9223372036854775808", "9223372036854775807"])")); + CheckCast(ArrayFromJSON(uint64(), "[0, 18446744073709551615]"), + ArrayFromJSON(decimal_type, R"(["0", "18446744073709551615"])")); + } + + // insufficient output precision + { + CastOptions options; + + options.to_type = decimal128(5, 3); + CheckCastFails(ArrayFromJSON(int8(), "[0]"), options); + + options.to_type = decimal256(76, 67); + CheckCastFails(ArrayFromJSON(int32(), "[0]"), options); + } +} + TEST(Cast, Decimal128ToDecimal128) { CastOptions options; From 186d356fc38b57e54119edfe5fbe6dbda825c75c Mon Sep 17 00:00:00 2001 From: Yibo Cai Date: Thu, 2 Sep 2021 04:50:56 +0000 Subject: [PATCH 2/2] refine error message of insufficient precision --- cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index dcc03be1c95..e9cf9284ceb 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -424,8 +424,12 @@ struct CastFunctor decimal_digits{3, 5, 10, 19}; using ctype = typename I::c_type; static_assert(sizeof(ctype) <= 8, ""); - if (out_precision < decimal_digits[BitUtil::Log2(sizeof(ctype))] + out_scale) { - return Status::Invalid("Invalid output precision and scale"); + const int precision = decimal_digits[BitUtil::Log2(sizeof(ctype))] + out_scale; + if (out_precision < precision) { + return Status::Invalid( + "Precision is not great enough for the result. " + "It should be at least ", + precision); } applicator::ScalarUnaryNotNullStateful kernel(