Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_cast_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,

ARROW_EXPORT extern OutputType kOutputTargetType;

// Add generic casts to out_ty from:
// - the null type
// - dictionary with out_ty as given value type
// - extension types with a compatible storage type
void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* func);

} // namespace internal
Expand Down
44 changes: 43 additions & 1 deletion cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,42 @@ struct CastFunctor<Decimal128Type, Decimal128Type> {
}
};

// ----------------------------------------------------------------------
// Real to decimal

struct RealToDecimal {
template <typename OUT, typename RealType>
Decimal128 Call(KernelContext* ctx, RealType val) const {
auto result = Decimal128::FromReal(val, out_precision_, out_scale_);
if (ARROW_PREDICT_FALSE(!result.ok())) {
if (!allow_truncate_) {
ctx->SetStatus(result.status());
}
return Decimal128(); // Zero
} else {
return *std::move(result);
}
}

int32_t out_scale_, out_precision_;
bool allow_truncate_;
};

template <typename I>
struct CastFunctor<Decimal128Type, I, enable_if_t<is_floating_type<I>::value>> {
static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
ArrayData* output = out->mutable_array();
const auto& out_type_inst = checked_cast<const Decimal128Type&>(*output->type);
const auto out_scale = out_type_inst.scale();
const auto out_precision = out_type_inst.precision();

applicator::ScalarUnaryNotNullStateful<Decimal128Type, I, RealToDecimal> kernel(
RealToDecimal{out_scale, out_precision, options.allow_decimal_truncate});
return kernel.Exec(ctx, batch, out);
}
};

namespace {

template <typename OutType>
Expand Down Expand Up @@ -530,10 +566,16 @@ std::shared_ptr<CastFunction> GetCastToFloating(std::string name) {
std::shared_ptr<CastFunction> GetCastToDecimal() {
OutputType sig_out_ty(ResolveOutputFromOptions);

// Cast to decimal
auto func = std::make_shared<CastFunction>("cast_decimal", Type::DECIMAL);
AddCommonCasts(Type::DECIMAL, sig_out_ty, func.get());

// Cast from floating point
DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty,
CastFunctor<Decimal128Type, FloatType>::Exec));
DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty,
CastFunctor<Decimal128Type, DoubleType>::Exec));

// Cast from other decimal
auto exec = CastFunctor<Decimal128Type, Decimal128Type>::Exec;
// We resolve the output type of this kernel from the CastOptions
DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType::Array(Type::DECIMAL)}, sig_out_ty,
Expand Down
88 changes: 73 additions & 15 deletions cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,11 @@ class TestCast : public TestBase {
}
}

template <typename InType, typename I_TYPE = typename TestCType<InType>::type>
void CheckFails(const std::shared_ptr<DataType>& in_type,
const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
const std::shared_ptr<DataType>& out_type, const CastOptions& options,
bool check_scalar = true) {
std::shared_ptr<Array> input;
if (is_valid.size() > 0) {
ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
} else {
ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
}
ASSERT_RAISES(Invalid, Cast(*input, out_type, options));
void CheckFails(const Array& input, const std::shared_ptr<DataType>& out_type,
const CastOptions& options, bool check_scalar = true) {
ASSERT_RAISES(Invalid, Cast(input, out_type, options));

if (in_type->id() == Type::DECIMAL || out_type->id() == Type::DECIMAL) {
if (input.type_id() == Type::DECIMAL || out_type->id() == Type::DECIMAL) {
// ARROW-9194
check_scalar = false;
}
Expand All @@ -124,14 +115,28 @@ class TestCast : public TestBase {
// cases we will want to check more precisely
if (check_scalar) {
int64_t num_failing = 0;
for (int64_t i = 0; i < input->length(); ++i) {
auto maybe_out = Cast(*input->GetScalar(i), out_type, options);
for (int64_t i = 0; i < input.length(); ++i) {
auto maybe_out = Cast(*input.GetScalar(i), out_type, options);
num_failing += static_cast<int>(maybe_out.status().IsInvalid());
}
ASSERT_GT(num_failing, 0);
}
}

template <typename InType, typename I_TYPE = typename TestCType<InType>::type>
void CheckFails(const std::shared_ptr<DataType>& in_type,
const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
const std::shared_ptr<DataType>& out_type, const CastOptions& options,
bool check_scalar = true) {
std::shared_ptr<Array> input;
if (is_valid.size() > 0) {
ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
} else {
ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
}
CheckFails(*input, out_type, options, check_scalar);
}

template <typename InType, typename I_TYPE = typename TestCType<InType>::type>
void CheckFails(const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
const std::shared_ptr<DataType>& out_type, const CastOptions& options,
Expand Down Expand Up @@ -202,6 +207,14 @@ class TestCast : public TestBase {
}
}

void CheckFailsJSON(const std::shared_ptr<DataType>& in_type,
const std::shared_ptr<DataType>& out_type,
const std::string& in_json, bool check_scalar = true,
const CastOptions& options = CastOptions()) {
std::shared_ptr<Array> input = ArrayFromJSON(in_type, in_json);
CheckFails(*input, out_type, options, check_scalar);
}

template <typename SourceType, typename DestType>
void TestCastBinaryToString() {
CastOptions options;
Expand Down Expand Up @@ -369,6 +382,23 @@ class TestCast : public TestBase {

// NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
}

void TestCastFloatingToDecimal(const std::shared_ptr<DataType>& in_type) {
auto out_type = decimal(5, 2);

CheckCaseJSON(in_type, out_type, "[0.0, null, 123.45, 123.456, 999.994]",
R"(["0.00", null, "123.45", "123.46", "999.99"])");

// Overflow
CastOptions options{};
out_type = decimal(5, 2);
CheckFailsJSON(in_type, out_type, "[999.996]", /*check_scalar=*/true, options);

options.allow_decimal_truncate = true;
CheckCaseJSON(in_type, out_type, "[0.0, null, 999.996, 123.45, 999.994]",
R"(["0.00", null, "0.00", "123.45", "999.99"])", /*check_scalar=*/true,
options);
}
};

TEST_F(TestCast, SameTypeZeroCopy) {
Expand Down Expand Up @@ -901,6 +931,34 @@ TEST_F(TestCast, DecimalToDecimal) {
check_truncate(decimal(4, 2), v5, is_valid1, decimal(2, 1), e5);
}

TEST_F(TestCast, FloatToDecimal) {
auto in_type = float32();

TestCastFloatingToDecimal(in_type);

// 2**64 + 2**41 (exactly representable as a float)
auto out_type = decimal(20, 0);
CheckCaseJSON(in_type, out_type, "[1.8446746e+19, -1.8446746e+19]",
R"(["18446746272732807168", "-18446746272732807168"])");
out_type = decimal(20, 4);
CheckCaseJSON(in_type, out_type, "[1.8446746e+15, -1.8446746e+15]",
R"(["1844674627273280.7168", "-1844674627273280.7168"])");
}

TEST_F(TestCast, DoubleToDecimal) {
auto in_type = float64();

TestCastFloatingToDecimal(in_type);

// 2**64 + 2**11 (exactly representable as a double)
auto out_type = decimal(20, 0);
CheckCaseJSON(in_type, out_type, "[1.8446744073709556e+19, -1.8446744073709556e+19]",
R"(["18446744073709555712", "-18446744073709555712"])");
out_type = decimal(20, 4);
CheckCaseJSON(in_type, out_type, "[1.8446744073709556e+15, -1.8446744073709556e+15]",
R"(["1844674407370955.5712", "-1844674407370955.5712"])");
}

TEST_F(TestCast, TimestampToTimestamp) {
CastOptions options;

Expand Down
136 changes: 113 additions & 23 deletions cpp/src/arrow/util/decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <algorithm>
#include <array>
#include <climits>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
Expand Down Expand Up @@ -48,6 +49,117 @@ static const Decimal128 kTenTo36(static_cast<int64_t>(0xC097CE7BC90715),
0xB34B9F1000000000);
static const Decimal128 kTenTo18(0xDE0B6B3A7640000);

static constexpr auto kInt64DecimalDigits =
static_cast<size_t>(std::numeric_limits<int64_t>::digits10);

static constexpr int64_t kInt64PowersOfTen[kInt64DecimalDigits + 1] = {
// clang-format off
1LL,
10LL,
100LL,
1000LL,
10000LL,
100000LL,
1000000LL,
10000000LL,
100000000LL,
1000000000LL,
10000000000LL,
100000000000LL,
1000000000000LL,
10000000000000LL,
100000000000000LL,
1000000000000000LL,
10000000000000000LL,
100000000000000000LL,
1000000000000000000LL
// clang-format on
};

static constexpr float kFloatPowersOfTen[2 * 38 + 1] = {
1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f,
1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f,
1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f,
1e-8f, 1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, 1e1f,
1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f, 1e11f,
1e12f, 1e13f, 1e14f, 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f,
1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, 1e29f, 1e30f, 1e31f,
1e32f, 1e33f, 1e34f, 1e35f, 1e36f, 1e37f, 1e38f};

static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6,
1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27,
1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38};

namespace {

template <typename Real, typename Derived>
struct Decimal128FromReal {
static Result<Decimal128> FromPositiveReal(Real real, int32_t precision,
int32_t scale) {
auto x = real;
if (scale >= -38 && scale <= 38) {
x *= Derived::powers_of_ten()[scale + 38];
} else {
x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
}
x = std::nearbyint(x);
const auto max_abs = Derived::powers_of_ten()[precision + 38];
if (x <= -max_abs || x >= max_abs) {
return Status::Invalid("Cannot convert ", real,
" to Decimal128(precision = ", precision,
", scale = ", scale, "): overflow");
}
// Extract high and low bits
const auto high = std::floor(std::ldexp(x, -64));
const auto low = x - std::ldexp(high, 64);

DCHECK_GE(high, -9.223372036854776e+18); // -2**63
DCHECK_LT(high, 9.223372036854776e+18); // 2**63
DCHECK_GE(low, 0);
DCHECK_LT(low, 1.8446744073709552e+19); // 2**64
return Decimal128(static_cast<int64_t>(high), static_cast<uint64_t>(low));
}

static Result<Decimal128> FromReal(Real x, int32_t precision, int32_t scale) {
DCHECK_GT(precision, 0);
DCHECK_LE(precision, 38);

if (!std::isfinite(x)) {
return Status::Invalid("Cannot convert ", x, " to Decimal128");
}
if (x < 0) {
ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
return dec.Negate();
} else {
// Includes negative zero
return FromPositiveReal(x, precision, scale);
}
}
};

struct Decimal128FromFloat : public Decimal128FromReal<float, Decimal128FromFloat> {
static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; }
};

struct Decimal128FromDouble : public Decimal128FromReal<double, Decimal128FromDouble> {
static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; }
};

} // namespace

Result<Decimal128> Decimal128::FromReal(float x, int32_t precision, int32_t scale) {
return Decimal128FromFloat::FromReal(x, precision, scale);
}

Result<Decimal128> Decimal128::FromReal(double x, int32_t precision, int32_t scale) {
return Decimal128FromDouble::FromReal(x, precision, scale);
}

std::string Decimal128::ToIntegerString() const {
Decimal128 remainder;
std::stringstream buf;
Expand Down Expand Up @@ -154,35 +266,13 @@ std::string Decimal128::ToString(int32_t scale) const {
return "0." + std::string(static_cast<size_t>(scale - len), '0') + str;
}

static constexpr auto kInt64DecimalDigits =
static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
static constexpr int64_t kPowersOfTen[kInt64DecimalDigits + 1] = {1LL,
10LL,
100LL,
1000LL,
10000LL,
100000LL,
1000000LL,
10000000LL,
100000000LL,
1000000000LL,
10000000000LL,
100000000000LL,
1000000000000LL,
10000000000000LL,
100000000000000LL,
1000000000000000LL,
10000000000000000LL,
100000000000000000LL,
1000000000000000000LL};

// Iterates over data and for each group of kInt64DecimalDigits multiple out by
// the appropriate power of 10 necessary to add source parsed as uint64 and
// then adds the parsed value of source.
static inline void ShiftAndAdd(const char* data, size_t length, Decimal128* out) {
for (size_t posn = 0; posn < length;) {
const size_t group_size = std::min(kInt64DecimalDigits, length - posn);
const int64_t multiple = kPowersOfTen[group_size];
const int64_t multiple = kInt64PowersOfTen[group_size];
int64_t chunk = 0;
ARROW_CHECK(internal::ParseValue<Int64Type>(data + posn, group_size, &chunk));

Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/util/decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ namespace arrow {

/// Represents a signed 128-bit integer in two's complement.
/// Calculations wrap around and overflow is ignored.
/// The max decimal precision that can be safely represented is
/// 38 significant digits.
///
/// For a discussion of the algorithms, look at Knuth's volume 2,
/// Semi-numerical Algorithms section 4.3.1.
Expand Down Expand Up @@ -101,6 +103,9 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
static Result<Decimal128> FromString(const std::string& s);
static Result<Decimal128> FromString(const char* s);

static Result<Decimal128> FromReal(double real, int32_t precision, int32_t scale);
static Result<Decimal128> FromReal(float real, int32_t precision, int32_t scale);

/// \brief Convert from a big-endian byte representation. The length must be
/// between 1 and 16.
/// \return error status if the length is an invalid value
Expand Down
Loading