From a6bb1baa1c380cf8129a9d5513053fbb1f4a5ef5 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 1 Jul 2020 20:34:15 +0200 Subject: [PATCH 1/5] ARROW-7011: [C++] Implement casts from float/double to decimal Also naturally available in Python using the Array.cast() method. --- .../compute/kernels/scalar_cast_internal.h | 4 + .../compute/kernels/scalar_cast_numeric.cc | 52 +++++- .../arrow/compute/kernels/scalar_cast_test.cc | 88 ++++++++-- cpp/src/arrow/util/decimal.cc | 134 ++++++++++++--- cpp/src/arrow/util/decimal.h | 5 + cpp/src/arrow/util/decimal_test.cc | 158 ++++++++++++++++++ 6 files changed, 402 insertions(+), 39 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h index 59cff565255..914b670ca47 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h @@ -75,6 +75,10 @@ Result ResolveOutputFromOptions(KernelContext* ctx, ARROW_EXPORT extern OutputType kOutputTargetType; +// Add generic casts to out_ty from: +// - the null type +// - dictionary with out_ty as given value type +// - extension types with a compatible storage type void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* func); } // namespace internal diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index f93bb357588..b056ff2586b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -467,6 +467,50 @@ struct CastFunctor { } }; +// ---------------------------------------------------------------------- +// Real to decimal + +template +struct SafeRealToDecimal { + template + Decimal128 Call(KernelContext* ctx, RealType val) const { + auto result = Decimal128::FromReal(val, out_precision_, out_scale_); + if (ARROW_PREDICT_FALSE(!result.ok())) { + if (!AllowTruncate) { + ctx->SetStatus(result.status()); + } + return Decimal128(); // Zero + } else { + return *std::move(result); + } + } + + int32_t out_scale_, out_precision_; +}; + +template +struct CastFunctor> { + static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + const auto& options = checked_cast(ctx->state())->options; + ArrayData* output = out->mutable_array(); + const auto& out_type_inst = checked_cast(*output->type); + const auto out_scale = out_type_inst.scale(); + const auto out_precision = out_type_inst.precision(); + + if (options.allow_decimal_truncate) { + using ExecutorType = SafeRealToDecimal; + applicator::ScalarUnaryNotNullStateful kernel( + ExecutorType{out_scale, out_precision}); + return kernel.Exec(ctx, batch, out); + } else { + using ExecutorType = SafeRealToDecimal; + applicator::ScalarUnaryNotNullStateful kernel( + ExecutorType{out_scale, out_precision}); + return kernel.Exec(ctx, batch, out); + } + } +}; + namespace { template @@ -530,10 +574,16 @@ std::shared_ptr GetCastToFloating(std::string name) { std::shared_ptr GetCastToDecimal() { OutputType sig_out_ty(ResolveOutputFromOptions); - // Cast to decimal auto func = std::make_shared("cast_decimal", Type::DECIMAL); AddCommonCasts(Type::DECIMAL, sig_out_ty, func.get()); + // Cast from floating point + DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty, + CastFunctor::Exec)); + DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty, + CastFunctor::Exec)); + + // Cast from other decimal auto exec = CastFunctor::Exec; // We resolve the output type of this kernel from the CastOptions DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType::Array(Type::DECIMAL)}, sig_out_ty, diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index a479fe903f0..252e50ee231 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -101,20 +101,11 @@ class TestCast : public TestBase { } } - template ::type> - void CheckFails(const std::shared_ptr& in_type, - const std::vector& in_values, const std::vector& is_valid, - const std::shared_ptr& out_type, const CastOptions& options, - bool check_scalar = true) { - std::shared_ptr input; - if (is_valid.size() > 0) { - ArrayFromVector(in_type, is_valid, in_values, &input); - } else { - ArrayFromVector(in_type, in_values, &input); - } - ASSERT_RAISES(Invalid, Cast(*input, out_type, options)); + void CheckFails(const Array& input, const std::shared_ptr& out_type, + const CastOptions& options, bool check_scalar = true) { + ASSERT_RAISES(Invalid, Cast(input, out_type, options)); - if (in_type->id() == Type::DECIMAL || out_type->id() == Type::DECIMAL) { + if (input.type_id() == Type::DECIMAL || out_type->id() == Type::DECIMAL) { // ARROW-9194 check_scalar = false; } @@ -124,14 +115,28 @@ class TestCast : public TestBase { // cases we will want to check more precisely if (check_scalar) { int64_t num_failing = 0; - for (int64_t i = 0; i < input->length(); ++i) { - auto maybe_out = Cast(*input->GetScalar(i), out_type, options); + for (int64_t i = 0; i < input.length(); ++i) { + auto maybe_out = Cast(*input.GetScalar(i), out_type, options); num_failing += static_cast(maybe_out.status().IsInvalid()); } ASSERT_GT(num_failing, 0); } } + template ::type> + void CheckFails(const std::shared_ptr& in_type, + const std::vector& in_values, const std::vector& is_valid, + const std::shared_ptr& out_type, const CastOptions& options, + bool check_scalar = true) { + std::shared_ptr input; + if (is_valid.size() > 0) { + ArrayFromVector(in_type, is_valid, in_values, &input); + } else { + ArrayFromVector(in_type, in_values, &input); + } + CheckFails(*input, out_type, options, check_scalar); + } + template ::type> void CheckFails(const std::vector& in_values, const std::vector& is_valid, const std::shared_ptr& out_type, const CastOptions& options, @@ -202,6 +207,14 @@ class TestCast : public TestBase { } } + void CheckFailsJSON(const std::shared_ptr& in_type, + const std::shared_ptr& out_type, + const std::string& in_json, bool check_scalar = true, + const CastOptions& options = CastOptions()) { + std::shared_ptr input = ArrayFromJSON(in_type, in_json); + CheckFails(*input, out_type, options, check_scalar); + } + template void TestCastBinaryToString() { CastOptions options; @@ -369,6 +382,23 @@ class TestCast : public TestBase { // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc } + + void TestCastFloatingToDecimal(const std::shared_ptr& in_type) { + auto out_type = decimal(5, 2); + + CheckCaseJSON(in_type, out_type, "[0.0, null, 123.45, 123.456, 999.994]", + R"(["0.00", null, "123.45", "123.46", "999.99"])"); + + // Overflow + CastOptions options{}; + out_type = decimal(5, 2); + CheckFailsJSON(in_type, out_type, "[999.996]", /*check_scalar=*/true, options); + + options.allow_decimal_truncate = true; + CheckCaseJSON(in_type, out_type, "[0.0, null, 999.996, 123.45, 999.994]", + R"(["0.00", null, "0.00", "123.45", "999.99"])", /*check_scalar=*/true, + options); + } }; TEST_F(TestCast, SameTypeZeroCopy) { @@ -901,6 +931,34 @@ TEST_F(TestCast, DecimalToDecimal) { check_truncate(decimal(4, 2), v5, is_valid1, decimal(2, 1), e5); } +TEST_F(TestCast, FloatToDecimal) { + auto in_type = float32(); + + TestCastFloatingToDecimal(in_type); + + // 2**64 + 2**41 (exactly representable as a float) + auto out_type = decimal(20, 0); + CheckCaseJSON(in_type, out_type, "[1.8446746e+19, -1.8446746e+19]", + R"(["18446746272732807168", "-18446746272732807168"])"); + out_type = decimal(20, 4); + CheckCaseJSON(in_type, out_type, "[1.8446746e+15, -1.8446746e+15]", + R"(["1844674627273280.7168", "-1844674627273280.7168"])"); +} + +TEST_F(TestCast, DoubleToDecimal) { + auto in_type = float64(); + + TestCastFloatingToDecimal(in_type); + + // 2**64 + 2**11 (exactly representable as a double) + auto out_type = decimal(20, 0); + CheckCaseJSON(in_type, out_type, "[1.8446744073709556e+19, -1.8446744073709556e+19]", + R"(["18446744073709555712", "-18446744073709555712"])"); + out_type = decimal(20, 4); + CheckCaseJSON(in_type, out_type, "[1.8446744073709556e+15, -1.8446744073709556e+15]", + R"(["1844674407370955.5712", "-1844674407370955.5712"])"); +} + TEST_F(TestCast, TimestampToTimestamp) { CastOptions options; diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 354f97982be..2e45bb61703 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,115 @@ static const Decimal128 kTenTo36(static_cast(0xC097CE7BC90715), 0xB34B9F1000000000); static const Decimal128 kTenTo18(0xDE0B6B3A7640000); +static constexpr auto kInt64DecimalDigits = + static_cast(std::numeric_limits::digits10); + +static constexpr int64_t kInt64PowersOfTen[kInt64DecimalDigits + 1] = { + // clang-format off + 1LL, + 10LL, + 100LL, + 1000LL, + 10000LL, + 100000LL, + 1000000LL, + 10000000LL, + 100000000LL, + 1000000000LL, + 10000000000LL, + 100000000000LL, + 1000000000000LL, + 10000000000000LL, + 100000000000000LL, + 1000000000000000LL, + 10000000000000000LL, + 100000000000000000LL, + 1000000000000000000LL + // clang-format on +}; + +static constexpr float kFloatPowersOfTen[2 * 38 + 1] = { + 1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f, + 1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f, + 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f, + 1e-8f, 1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, 1e1f, + 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f, 1e11f, + 1e12f, 1e13f, 1e14f, 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f, + 1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, 1e29f, 1e30f, 1e31f, + 1e32f, 1e33f, 1e34f, 1e35f, 1e36f, 1e37f, 1e38f}; + +static constexpr double kDoublePowersOfTen[2 * 38 + 1] = { + 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, + 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, + 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, + 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, + 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, + 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, + 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38}; + +namespace { + +template +struct Decimal128FromReal { + static Result FromPositiveReal(Real x, int32_t precision, int32_t scale) { + if (scale >= -38 && scale <= 38) { + x *= Derived::powers_of_ten()[scale + 38]; + } else { + x *= std::pow(x, 10); + } + x = std::nearbyint(x); + const auto max_abs = Derived::powers_of_ten()[precision + 38]; + if (x <= -max_abs || x >= max_abs) { + return Status::Invalid("Cannot convert ", x, + " to Decimal128(precision = ", precision, + ", scale = ", scale, "): overflow"); + } + // Extract high and low bits + const auto high = std::floor(std::ldexp(x, -64)); + const auto low = x - std::ldexp(high, 64); + + DCHECK_GE(high, -9.223372036854776e+18); // -2**63 + DCHECK_LT(high, 9.223372036854776e+18); // 2**63 + DCHECK_GE(low, 0); + DCHECK_LT(low, 1.8446744073709552e+19); // 2**64 + return Decimal128(static_cast(high), static_cast(low)); + } + + static Result FromReal(Real x, int32_t precision, int32_t scale) { + DCHECK_GT(precision, 0); + DCHECK_LE(precision, 38); + + if (!std::isfinite(x)) { + return Status::Invalid("Cannot convert ", x, " to Decimal128"); + } + if (x < 0) { + ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale)); + return dec.Negate(); + } else { + // Includes negative zero + return FromPositiveReal(x, precision, scale); + } + } +}; + +struct Decimal128FromFloat : public Decimal128FromReal { + static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; } +}; + +struct Decimal128FromDouble : public Decimal128FromReal { + static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; } +}; + +} // namespace + +Result Decimal128::FromReal(float x, int32_t precision, int32_t scale) { + return Decimal128FromFloat::FromReal(x, precision, scale); +} + +Result Decimal128::FromReal(double x, int32_t precision, int32_t scale) { + return Decimal128FromDouble::FromReal(x, precision, scale); +} + std::string Decimal128::ToIntegerString() const { Decimal128 remainder; std::stringstream buf; @@ -154,35 +264,13 @@ std::string Decimal128::ToString(int32_t scale) const { return "0." + std::string(static_cast(scale - len), '0') + str; } -static constexpr auto kInt64DecimalDigits = - static_cast(std::numeric_limits::digits10); -static constexpr int64_t kPowersOfTen[kInt64DecimalDigits + 1] = {1LL, - 10LL, - 100LL, - 1000LL, - 10000LL, - 100000LL, - 1000000LL, - 10000000LL, - 100000000LL, - 1000000000LL, - 10000000000LL, - 100000000000LL, - 1000000000000LL, - 10000000000000LL, - 100000000000000LL, - 1000000000000000LL, - 10000000000000000LL, - 100000000000000000LL, - 1000000000000000000LL}; - // Iterates over data and for each group of kInt64DecimalDigits multiple out by // the appropriate power of 10 necessary to add source parsed as uint64 and // then adds the parsed value of source. static inline void ShiftAndAdd(const char* data, size_t length, Decimal128* out) { for (size_t posn = 0; posn < length;) { const size_t group_size = std::min(kInt64DecimalDigits, length - posn); - const int64_t multiple = kPowersOfTen[group_size]; + const int64_t multiple = kInt64PowersOfTen[group_size]; int64_t chunk = 0; ARROW_CHECK(internal::ParseValue(data + posn, group_size, &chunk)); diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h index 8ce337fe354..d20a24620e1 100644 --- a/cpp/src/arrow/util/decimal.h +++ b/cpp/src/arrow/util/decimal.h @@ -32,6 +32,8 @@ namespace arrow { /// Represents a signed 128-bit integer in two's complement. /// Calculations wrap around and overflow is ignored. +/// The max decimal precision that can be safely represented is +/// 38 significant digits. /// /// For a discussion of the algorithms, look at Knuth's volume 2, /// Semi-numerical Algorithms section 4.3.1. @@ -101,6 +103,9 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 { static Result FromString(const std::string& s); static Result FromString(const char* s); + static Result FromReal(double real, int32_t precision, int32_t scale); + static Result FromReal(float real, int32_t precision, int32_t scale); + /// \brief Convert from a big-endian byte representation. The length must be /// between 1 and 16. /// \return error status if the length is an invalid value diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index a152c764fb1..60c25c23108 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -349,6 +349,164 @@ TEST(Decimal128ParseTest, WithExponentAndNullptrScale) { ASSERT_OK_AND_EQ(expected_value, Decimal128::FromString("1.23E-8")); } +template +void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale, + const std::string& expected) { + ASSERT_OK_AND_ASSIGN(auto dec, Decimal128::FromReal(real, precision, scale)); + ASSERT_EQ(dec.ToString(scale), expected); +} + +template +struct FromRealTestParam { + Real real; + int32_t precision; + int32_t scale; + std::string expected; +}; + +using FromFloatTestParam = FromRealTestParam; +using FromDoubleTestParam = FromRealTestParam; + +// Common tests for Decimal128::FromReal(T, ...) +template +class TestDecimalFromReal : public ::testing::Test { + public: + using ParamType = FromRealTestParam; + + void TestSuccess() { + const std::vector params{ + // clang-format off + {0.0, 1, 0, "0"}, + {-0.0, 1, 0, "0"}, + {0.0, 19, 4, "0.0000"}, + {-0.0, 19, 4, "0.0000"}, + {123.0, 7, 4, "123.0000"}, + {-123.0, 7, 4, "-123.0000"}, + {456.78, 7, 4, "456.7800"}, + {-456.78, 7, 4, "-456.7800"}, + {456.784, 5, 2, "456.78"}, + {-456.784, 5, 2, "-456.78"}, + {456.786, 5, 2, "456.79"}, + {-456.786, 5, 2, "-456.79"}, + {999.99, 5, 2, "999.99"}, + {-999.99, 5, 2, "-999.99"}, + {123.0, 19, 0, "123"}, + {-123.0, 19, 0, "-123"}, + {123.4, 19, 0, "123"}, + {-123.4, 19, 0, "-123"}, + {123.6, 19, 0, "124"}, + {-123.6, 19, 0, "-124"}, + // 2**62 + {4.611686018427388e+18, 19, 0, "4611686018427387904"}, + {-4.611686018427388e+18, 19, 0, "-4611686018427387904"}, + // 2**63 + {9.223372036854776e+18, 19, 0, "9223372036854775808"}, + {-9.223372036854776e+18, 19, 0, "-9223372036854775808"}, + {9.223372036854776e+17, 19, 1, "922337203685477580.8"}, + {-9.223372036854776e+17, 19, 1, "-922337203685477580.8"}, + // 2**64 + {1.8446744073709552e+19, 20, 0, "18446744073709551616"}, + {-1.8446744073709552e+19, 20, 0, "-18446744073709551616"} + // clang-format on + }; + for (const ParamType& param : params) { + CheckDecimalFromReal(param.real, param.precision, param.scale, param.expected); + } + } + + void TestErrors() { + ASSERT_RAISES(Invalid, Decimal128::FromReal(INFINITY, 19, 4)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(-INFINITY, 19, 4)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(NAN, 19, 4)); + // Overflows + ASSERT_RAISES(Invalid, Decimal128::FromReal(1000.0, 3, 0)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(-1000.0, 3, 0)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(1000.0, 5, 2)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(-1000.0, 5, 2)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(999.996, 5, 2)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(-999.996, 5, 2)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(1e+38, 38, 0)); + ASSERT_RAISES(Invalid, Decimal128::FromReal(-1e+38, 38, 0)); + } +}; + +using RealTypes = ::testing::Types; +TYPED_TEST_SUITE(TestDecimalFromReal, RealTypes); + +TYPED_TEST(TestDecimalFromReal, TestSuccess) { this->TestSuccess(); } + +TYPED_TEST(TestDecimalFromReal, TestErrors) { this->TestErrors(); } + +// Custom test for Decimal128::FromReal(float, ...) +class TestDecimalFromRealFloat : public ::testing::TestWithParam {}; + +TEST_P(TestDecimalFromRealFloat, SuccessConversion) { + const auto param = GetParam(); + CheckDecimalFromReal(param.real, param.precision, param.scale, param.expected); +} + +// clang-format off +INSTANTIATE_TEST_SUITE_P( + TestDecimalFromRealFloat, TestDecimalFromRealFloat, + ::testing::Values( + // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) + FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"}, + FromFloatTestParam{-9.223373e+18f, 19, 0, "-9223373136366403584"}, + FromFloatTestParam{9.223373e+14f, 19, 4, "922337313636640.3584"}, + FromFloatTestParam{-9.223373e+14f, 19, 4, "-922337313636640.3584"}, + // 2**64 - 2**40 (exactly representable in a float) + FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"}, + FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"}, + // 2**64 + 2**41 (exactly representable in a double) + FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"}, + FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"}, + FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"}, + FromFloatTestParam{-1.8446746e+15f, 20, 4, "-1844674627273280.7168"}, + // Almost 10**38 (minus 2**103) + FromFloatTestParam{9.999999e+37f, 38, 0, + "99999986661652122824821048795547566080"}, + FromFloatTestParam{-9.999999e+37f, 38, 0, + "-99999986661652122824821048795547566080"} +)); +// clang-format on + +// Custom test for Decimal128::FromReal(double, ...) +class TestDecimalFromRealDouble : public ::testing::TestWithParam {}; + +TEST_P(TestDecimalFromRealDouble, SuccessConversion) { + const auto param = GetParam(); + CheckDecimalFromReal(param.real, param.precision, param.scale, param.expected); +} + +// clang-format off +INSTANTIATE_TEST_SUITE_P( + TestDecimalFromRealDouble, TestDecimalFromRealDouble, + ::testing::Values( + // 2**63 + 2**11 (exactly representable in a double's 53 bits of precision) + FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"}, + FromDoubleTestParam{-9.223372036854778e+18, 19, 0, "-9223372036854777856"}, + FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777856"}, + FromDoubleTestParam{-9.223372036854778e+10, 19, 8, "-92233720368.54777856"}, + // 2**64 - 2**11 (exactly representable in a double) + FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"}, + FromDoubleTestParam{-1.844674407370955e+19, 20, 0, "-18446744073709549568"}, + // 2**64 + 2**11 (exactly representable in a double) + FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"}, + FromDoubleTestParam{-1.8446744073709556e+19, 20, 0, "-18446744073709555712"}, + FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5712"}, + FromDoubleTestParam{-1.8446744073709556e+15, 20, 4, "-1844674407370955.5712"}, + // Almost 10**38 (minus 2**73) + FromDoubleTestParam{9.999999999999998e+37, 38, 0, + "99999999999999978859343891977453174784"}, + FromDoubleTestParam{-9.999999999999998e+37, 38, 0, + "-99999999999999978859343891977453174784"}, + FromDoubleTestParam{9.999999999999998e+27, 38, 10, + "9999999999999997885934389197.7453174784"}, + FromDoubleTestParam{-9.999999999999998e+27, 38, 10, + "-9999999999999997885934389197.7453174784"} +)); +// clang-format on + TEST(Decimal128Test, TestSmallNumberFormat) { Decimal128 value("0.2"); std::string expected("0.2"); From d929e80f688cd4613eeb5a794c657195cefb6135 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 1 Jul 2020 21:40:33 +0200 Subject: [PATCH 2/5] Try to fix CI failures --- .../compute/kernels/scalar_cast_numeric.cc | 15 +++--- cpp/src/arrow/util/decimal_test.cc | 54 +++++++++---------- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index b056ff2586b..055afd19792 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -471,7 +471,7 @@ struct CastFunctor { // Real to decimal template -struct SafeRealToDecimal { +struct RealToDecimal { template Decimal128 Call(KernelContext* ctx, RealType val) const { auto result = Decimal128::FromReal(val, out_precision_, out_scale_); @@ -488,6 +488,9 @@ struct SafeRealToDecimal { int32_t out_scale_, out_precision_; }; +using SafeRealToDecimal = RealToDecimal; +using UnsafeRealToDecimal = RealToDecimal; + template struct CastFunctor> { static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { @@ -498,14 +501,12 @@ struct CastFunctor> { const auto out_precision = out_type_inst.precision(); if (options.allow_decimal_truncate) { - using ExecutorType = SafeRealToDecimal; - applicator::ScalarUnaryNotNullStateful kernel( - ExecutorType{out_scale, out_precision}); + applicator::ScalarUnaryNotNullStateful kernel( + SafeRealToDecimal{out_scale, out_precision}); return kernel.Exec(ctx, batch, out); } else { - using ExecutorType = SafeRealToDecimal; - applicator::ScalarUnaryNotNullStateful kernel( - ExecutorType{out_scale, out_precision}); + applicator::ScalarUnaryNotNullStateful + kernel(UnsafeRealToDecimal{out_scale, out_precision}); return kernel.Exec(ctx, batch, out); } } diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 60c25c23108..6a7aa4cc4e5 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -376,37 +376,35 @@ class TestDecimalFromReal : public ::testing::Test { void TestSuccess() { const std::vector params{ // clang-format off - {0.0, 1, 0, "0"}, - {-0.0, 1, 0, "0"}, - {0.0, 19, 4, "0.0000"}, - {-0.0, 19, 4, "0.0000"}, - {123.0, 7, 4, "123.0000"}, - {-123.0, 7, 4, "-123.0000"}, - {456.78, 7, 4, "456.7800"}, - {-456.78, 7, 4, "-456.7800"}, - {456.784, 5, 2, "456.78"}, - {-456.784, 5, 2, "-456.78"}, - {456.786, 5, 2, "456.79"}, - {-456.786, 5, 2, "-456.79"}, - {999.99, 5, 2, "999.99"}, - {-999.99, 5, 2, "-999.99"}, - {123.0, 19, 0, "123"}, - {-123.0, 19, 0, "-123"}, - {123.4, 19, 0, "123"}, - {-123.4, 19, 0, "-123"}, - {123.6, 19, 0, "124"}, - {-123.6, 19, 0, "-124"}, + {0.0f, 1, 0, "0"}, + {-0.0f, 1, 0, "0"}, + {0.0f, 19, 4, "0.0000"}, + {-0.0f, 19, 4, "0.0000"}, + {123.0f, 7, 4, "123.0000"}, + {-123.0f, 7, 4, "-123.0000"}, + {456.78f, 7, 4, "456.7800"}, + {-456.78f, 7, 4, "-456.7800"}, + {456.784f, 5, 2, "456.78"}, + {-456.784f, 5, 2, "-456.78"}, + {456.786f, 5, 2, "456.79"}, + {-456.786f, 5, 2, "-456.79"}, + {999.99f, 5, 2, "999.99"}, + {-999.99f, 5, 2, "-999.99"}, + {123.0f, 19, 0, "123"}, + {-123.0f, 19, 0, "-123"}, + {123.4f, 19, 0, "123"}, + {-123.4f, 19, 0, "-123"}, + {123.6f, 19, 0, "124"}, + {-123.6f, 19, 0, "-124"}, // 2**62 - {4.611686018427388e+18, 19, 0, "4611686018427387904"}, - {-4.611686018427388e+18, 19, 0, "-4611686018427387904"}, + {4.611686e+18f, 19, 0, "4611686018427387904"}, + {-4.611686e+18f, 19, 0, "-4611686018427387904"}, // 2**63 - {9.223372036854776e+18, 19, 0, "9223372036854775808"}, - {-9.223372036854776e+18, 19, 0, "-9223372036854775808"}, - {9.223372036854776e+17, 19, 1, "922337203685477580.8"}, - {-9.223372036854776e+17, 19, 1, "-922337203685477580.8"}, + {9.223372e+18f, 19, 0, "9223372036854775808"}, + {-9.223372e+18f, 19, 0, "-9223372036854775808"}, // 2**64 - {1.8446744073709552e+19, 20, 0, "18446744073709551616"}, - {-1.8446744073709552e+19, 20, 0, "-18446744073709551616"} + {1.8446744e+19f, 20, 0, "18446744073709551616"}, + {-1.8446744e+19f, 20, 0, "-18446744073709551616"} // clang-format on }; for (const ParamType& param : params) { From e7537336b311e05d5d613f8518081b48e38aaa46 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 1 Jul 2020 22:10:30 +0200 Subject: [PATCH 3/5] Fix compilation failure on gcc 4.8.2 --- cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index 055afd19792..72ef8bdf128 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -492,7 +492,7 @@ using SafeRealToDecimal = RealToDecimal; using UnsafeRealToDecimal = RealToDecimal; template -struct CastFunctor> { +struct CastFunctor::value>> { static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { const auto& options = checked_cast(ctx->state())->options; ArrayData* output = out->mutable_array(); From 8adcb62816abb29c333b773721559681bf45ef54 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 1 Jul 2020 23:20:51 +0200 Subject: [PATCH 4/5] Add a test for extreme values --- cpp/src/arrow/util/decimal.cc | 8 ++++--- cpp/src/arrow/util/decimal_test.cc | 35 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 2e45bb61703..562080323de 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -99,16 +99,18 @@ namespace { template struct Decimal128FromReal { - static Result FromPositiveReal(Real x, int32_t precision, int32_t scale) { + static Result FromPositiveReal(Real real, int32_t precision, + int32_t scale) { + auto x = real; if (scale >= -38 && scale <= 38) { x *= Derived::powers_of_ten()[scale + 38]; } else { - x *= std::pow(x, 10); + x *= std::pow(static_cast(10), static_cast(scale)); } x = std::nearbyint(x); const auto max_abs = Derived::powers_of_ten()[precision + 38]; if (x <= -max_abs || x >= max_abs) { - return Status::Invalid("Cannot convert ", x, + return Status::Invalid("Cannot convert ", real, " to Decimal128(precision = ", precision, ", scale = ", scale, "): overflow"); } diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 6a7aa4cc4e5..e80eaab4f3f 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -356,6 +356,13 @@ void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale, ASSERT_EQ(dec.ToString(scale), expected); } +template +void CheckDecimalFromRealIntegerString(Real real, int32_t precision, int32_t scale, + const std::string& expected) { + ASSERT_OK_AND_ASSIGN(auto dec, Decimal128::FromReal(real, precision, scale)); + ASSERT_EQ(dec.ToIntegerString(), expected); +} + template struct FromRealTestParam { Real real; @@ -468,6 +475,20 @@ INSTANTIATE_TEST_SUITE_P( )); // clang-format on +TEST(TestDecimalFromRealFloat, LargeValues) { + // Test the entire float range + for (int32_t scale = -38; scale <= 38; ++scale) { + float real = std::pow(10.0f, static_cast(scale)); + CheckDecimalFromRealIntegerString(real, 1, -scale, "1"); + } + for (int32_t scale = -37; scale <= 36; ++scale) { + float real = 123.f * std::pow(10.f, static_cast(scale)); + CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12"); + CheckDecimalFromRealIntegerString(real, 3, -scale, "123"); + CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230"); + } +} + // Custom test for Decimal128::FromReal(double, ...) class TestDecimalFromRealDouble : public ::testing::TestWithParam {}; @@ -505,6 +526,20 @@ INSTANTIATE_TEST_SUITE_P( )); // clang-format on +TEST(TestDecimalFromRealDouble, LargeValues) { + // Test the entire double range + for (int32_t scale = -308; scale <= 308; ++scale) { + double real = std::pow(10.0, static_cast(scale)); + CheckDecimalFromRealIntegerString(real, 1, -scale, "1"); + } + for (int32_t scale = -307; scale <= 306; ++scale) { + double real = 123. * std::pow(10.0, static_cast(scale)); + CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12"); + CheckDecimalFromRealIntegerString(real, 3, -scale, "123"); + CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230"); + } +} + TEST(Decimal128Test, TestSmallNumberFormat) { Decimal128 value("0.2"); std::string expected("0.2"); From 09d926aa52be0b95359bd531c7d6586ca79e76d0 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 2 Jul 2020 15:53:37 -0500 Subject: [PATCH 5/5] Generate only one applicator --- .../compute/kernels/scalar_cast_numeric.cc | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index 72ef8bdf128..3b5aac4a8df 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -470,13 +470,12 @@ struct CastFunctor { // ---------------------------------------------------------------------- // Real to decimal -template struct RealToDecimal { template Decimal128 Call(KernelContext* ctx, RealType val) const { auto result = Decimal128::FromReal(val, out_precision_, out_scale_); if (ARROW_PREDICT_FALSE(!result.ok())) { - if (!AllowTruncate) { + if (!allow_truncate_) { ctx->SetStatus(result.status()); } return Decimal128(); // Zero @@ -486,11 +485,9 @@ struct RealToDecimal { } int32_t out_scale_, out_precision_; + bool allow_truncate_; }; -using SafeRealToDecimal = RealToDecimal; -using UnsafeRealToDecimal = RealToDecimal; - template struct CastFunctor::value>> { static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { @@ -500,15 +497,9 @@ struct CastFunctor::value>> { const auto out_scale = out_type_inst.scale(); const auto out_precision = out_type_inst.precision(); - if (options.allow_decimal_truncate) { - applicator::ScalarUnaryNotNullStateful kernel( - SafeRealToDecimal{out_scale, out_precision}); - return kernel.Exec(ctx, batch, out); - } else { - applicator::ScalarUnaryNotNullStateful - kernel(UnsafeRealToDecimal{out_scale, out_precision}); - return kernel.Exec(ctx, batch, out); - } + applicator::ScalarUnaryNotNullStateful kernel( + RealToDecimal{out_scale, out_precision, options.allow_decimal_truncate}); + return kernel.Exec(ctx, batch, out); } };