From 2c78a979391efbfeb472c5d4f715ad68531abdb0 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 5 Jun 2023 19:22:01 +0200 Subject: [PATCH 1/4] GH-35576: [C++] Make Decimal{128,256}::FromReal more accurate The original algorithm for real-to-decimal conversion did its computations in the floating-point domain, accumulating rounding errors especially for large scale or precision values, such as: ``` >>> pa.array([1234567890.]).cast(pa.decimal128(38, 11)) [ 1234567889.99999995904 ] >>> pa.array([1234567890.]).cast(pa.decimal128(38, 12)) [ 1234567890.000000057344 ] ``` The new algorithm strives to avoid precision loss by doing all its computations in the decimal domain. However, negative scales, which are presumably infrequent, fall back on the old algorithm. --- .../arrow/compute/kernels/scalar_cast_test.cc | 4 +- cpp/src/arrow/util/basic_decimal.cc | 408 ++------------ cpp/src/arrow/util/basic_decimal.h | 96 ++-- cpp/src/arrow/util/decimal.cc | 525 +++++++++++------- cpp/src/arrow/util/decimal.h | 50 +- cpp/src/arrow/util/decimal_internal.h | 494 ++++++++++++++++ cpp/src/arrow/util/decimal_test.cc | 306 +++++++--- python/pyarrow/tests/test_compute.py | 125 +++++ 8 files changed, 1275 insertions(+), 733 deletions(-) create mode 100644 cpp/src/arrow/util/decimal_internal.h diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index a7613eb2b8e..083a85eb346 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1006,12 +1006,12 @@ TEST(Cast, FloatingToDecimal) { CheckCast(ArrayFromJSON(float32(), "[1.8446746e+15, -1.8446746e+15]"), ArrayFromJSON(decimal_type(20, 4), - R"(["1844674627273280.7168", "-1844674627273280.7168"])")); + R"(["1844674629206016.0000", "-1844674629206016.0000"])")); CheckCast( ArrayFromJSON(float64(), "[1.8446744073709556e+15, -1.8446744073709556e+15]"), ArrayFromJSON(decimal_type(20, 4), - R"(["1844674407370955.5712", "-1844674407370955.5712"])")); + R"(["1844674407370955.5000", "-1844674407370955.5000"])")); // Edge cases are tested for Decimal128::FromReal() and Decimal256::FromReal } diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index b8f218ede98..d7affddb473 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -29,6 +29,7 @@ #include "arrow/util/bit_util.h" #include "arrow/util/config.h" // for ARROW_USE_NATIVE_INT128 +#include "arrow/util/decimal_internal.h" #include "arrow/util/endian.h" #include "arrow/util/int128_internal.h" #include "arrow/util/int_util_overflow.h" @@ -43,336 +44,15 @@ using internal::SafeSignedAdd; using internal::SafeSignedSubtract; using internal::SubtractWithOverflow; -static const BasicDecimal128 ScaleMultipliers[] = { - BasicDecimal128(1LL), - BasicDecimal128(10LL), - BasicDecimal128(100LL), - BasicDecimal128(1000LL), - BasicDecimal128(10000LL), - BasicDecimal128(100000LL), - BasicDecimal128(1000000LL), - BasicDecimal128(10000000LL), - BasicDecimal128(100000000LL), - BasicDecimal128(1000000000LL), - BasicDecimal128(10000000000LL), - BasicDecimal128(100000000000LL), - BasicDecimal128(1000000000000LL), - BasicDecimal128(10000000000000LL), - BasicDecimal128(100000000000000LL), - BasicDecimal128(1000000000000000LL), - BasicDecimal128(10000000000000000LL), - BasicDecimal128(100000000000000000LL), - BasicDecimal128(1000000000000000000LL), - BasicDecimal128(0LL, 10000000000000000000ULL), - BasicDecimal128(5LL, 7766279631452241920ULL), - BasicDecimal128(54LL, 3875820019684212736ULL), - BasicDecimal128(542LL, 1864712049423024128ULL), - BasicDecimal128(5421LL, 200376420520689664ULL), - BasicDecimal128(54210LL, 2003764205206896640ULL), - BasicDecimal128(542101LL, 1590897978359414784ULL), - BasicDecimal128(5421010LL, 15908979783594147840ULL), - BasicDecimal128(54210108LL, 11515845246265065472ULL), - BasicDecimal128(542101086LL, 4477988020393345024ULL), - BasicDecimal128(5421010862LL, 7886392056514347008ULL), - BasicDecimal128(54210108624LL, 5076944270305263616ULL), - BasicDecimal128(542101086242LL, 13875954555633532928ULL), - BasicDecimal128(5421010862427LL, 9632337040368467968ULL), - BasicDecimal128(54210108624275LL, 4089650035136921600ULL), - BasicDecimal128(542101086242752LL, 4003012203950112768ULL), - BasicDecimal128(5421010862427522LL, 3136633892082024448ULL), - BasicDecimal128(54210108624275221LL, 12919594847110692864ULL), - BasicDecimal128(542101086242752217LL, 68739955140067328ULL), - BasicDecimal128(5421010862427522170LL, 687399551400673280ULL)}; - -static const BasicDecimal128 ScaleMultipliersHalf[] = { - BasicDecimal128(0ULL), - BasicDecimal128(5ULL), - BasicDecimal128(50ULL), - BasicDecimal128(500ULL), - BasicDecimal128(5000ULL), - BasicDecimal128(50000ULL), - BasicDecimal128(500000ULL), - BasicDecimal128(5000000ULL), - BasicDecimal128(50000000ULL), - BasicDecimal128(500000000ULL), - BasicDecimal128(5000000000ULL), - BasicDecimal128(50000000000ULL), - BasicDecimal128(500000000000ULL), - BasicDecimal128(5000000000000ULL), - BasicDecimal128(50000000000000ULL), - BasicDecimal128(500000000000000ULL), - BasicDecimal128(5000000000000000ULL), - BasicDecimal128(50000000000000000ULL), - BasicDecimal128(500000000000000000ULL), - BasicDecimal128(5000000000000000000ULL), - BasicDecimal128(2LL, 13106511852580896768ULL), - BasicDecimal128(27LL, 1937910009842106368ULL), - BasicDecimal128(271LL, 932356024711512064ULL), - BasicDecimal128(2710LL, 9323560247115120640ULL), - BasicDecimal128(27105LL, 1001882102603448320ULL), - BasicDecimal128(271050LL, 10018821026034483200ULL), - BasicDecimal128(2710505LL, 7954489891797073920ULL), - BasicDecimal128(27105054LL, 5757922623132532736ULL), - BasicDecimal128(271050543LL, 2238994010196672512ULL), - BasicDecimal128(2710505431LL, 3943196028257173504ULL), - BasicDecimal128(27105054312LL, 2538472135152631808ULL), - BasicDecimal128(271050543121LL, 6937977277816766464ULL), - BasicDecimal128(2710505431213LL, 14039540557039009792ULL), - BasicDecimal128(27105054312137LL, 11268197054423236608ULL), - BasicDecimal128(271050543121376LL, 2001506101975056384ULL), - BasicDecimal128(2710505431213761LL, 1568316946041012224ULL), - BasicDecimal128(27105054312137610LL, 15683169460410122240ULL), - BasicDecimal128(271050543121376108LL, 9257742014424809472ULL), - BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)}; - -#define BasicDecimal256FromLE(v1, v2, v3, v4) \ - BasicDecimal256(bit_util::little_endian::ToNative(v1, v2, v3, v4)) - -static const BasicDecimal256 ScaleMultipliersDecimal256[] = { - BasicDecimal256FromLE({1ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({100ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({100000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({100000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({100000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({100000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({100000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}), - BasicDecimal256FromLE({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}), - BasicDecimal256FromLE( - {4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}), - BasicDecimal256FromLE( - {11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}), - BasicDecimal256FromLE( - {7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}), - BasicDecimal256FromLE( - {80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}), - BasicDecimal256FromLE( - {802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}), - BasicDecimal256FromLE( - {8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}), - BasicDecimal256FromLE( - {6450984253743169536ULL, 13015503840481697412ULL, 293873587ULL, 0ULL}), - BasicDecimal256FromLE( - {9169610316303040512ULL, 1027829888850112811ULL, 2938735877ULL, 0ULL}), - BasicDecimal256FromLE( - {17909126868192198656ULL, 10278298888501128114ULL, 29387358770ULL, 0ULL}), - BasicDecimal256FromLE( - {13070572018536022016ULL, 10549268516463523069ULL, 293873587705ULL, 0ULL}), - BasicDecimal256FromLE( - {1578511669393358848ULL, 13258964796087472617ULL, 2938735877055ULL, 0ULL}), - BasicDecimal256FromLE( - {15785116693933588480ULL, 3462439444907864858ULL, 29387358770557ULL, 0ULL}), - BasicDecimal256FromLE( - {10277214349659471872ULL, 16177650375369096972ULL, 293873587705571ULL, 0ULL}), - BasicDecimal256FromLE( - {10538423128046960640ULL, 14202551164014556797ULL, 2938735877055718ULL, 0ULL}), - BasicDecimal256FromLE( - {13150510911921848320ULL, 12898303124178706663ULL, 29387358770557187ULL, 0ULL}), - BasicDecimal256FromLE( - {2377900603251621888ULL, 18302566799529756941ULL, 293873587705571876ULL, 0ULL}), - BasicDecimal256FromLE( - {5332261958806667264ULL, 17004971331911604867ULL, 2938735877055718769ULL, 0ULL}), - BasicDecimal256FromLE( - {16429131440647569408ULL, 4029016655730084128ULL, 10940614696847636083ULL, 1ULL}), - BasicDecimal256FromLE({16717361816799281152ULL, 3396678409881738056ULL, - 17172426599928602752ULL, 15ULL}), - BasicDecimal256FromLE({1152921504606846976ULL, 15520040025107828953ULL, - 5703569335900062977ULL, 159ULL}), - BasicDecimal256FromLE({11529215046068469760ULL, 7626447661401876602ULL, - 1695461137871974930ULL, 1593ULL}), - BasicDecimal256FromLE({4611686018427387904ULL, 2477500319180559562ULL, - 16954611378719749304ULL, 15930ULL}), - BasicDecimal256FromLE({9223372036854775808ULL, 6328259118096044006ULL, - 3525417123811528497ULL, 159309ULL}), - BasicDecimal256FromLE( - {0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}), - BasicDecimal256FromLE( - {0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}), - BasicDecimal256FromLE( - {0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}), - BasicDecimal256FromLE( - {0ULL, 10259008136678022120ULL, 2443313256331835254ULL, 1593091911ULL}), - BasicDecimal256FromLE( - {0ULL, 10356360998232463120ULL, 5986388489608800929ULL, 15930919111ULL}), - BasicDecimal256FromLE( - {0ULL, 11329889613776873120ULL, 4523652674959354447ULL, 159309191113ULL}), - BasicDecimal256FromLE( - {0ULL, 2618431695511421504ULL, 8343038602174441244ULL, 1593091911132ULL}), - BasicDecimal256FromLE( - {0ULL, 7737572881404663424ULL, 9643409726906205977ULL, 15930919111324ULL}), - BasicDecimal256FromLE( - {0ULL, 3588752519208427776ULL, 4200376900514301694ULL, 159309191113245ULL}), - BasicDecimal256FromLE( - {0ULL, 17440781118374726144ULL, 5110280857723913709ULL, 1593091911132452ULL}), - BasicDecimal256FromLE( - {0ULL, 8387114520361296896ULL, 14209320429820033867ULL, 15930919111324522ULL}), - BasicDecimal256FromLE( - {0ULL, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}), - BasicDecimal256FromLE( - {0ULL, 8607968719199866880ULL, 532749306367912313ULL, 1593091911132452277ULL})}; - -static const BasicDecimal256 ScaleMultipliersHalfDecimal256[] = { - BasicDecimal256FromLE({0ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({50ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({500ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({50000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({500000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({50000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({500000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({50000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({500000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({50000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({500000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({50000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({500000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({932356024711512064ULL, 271ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}), - BasicDecimal256FromLE({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}), - BasicDecimal256FromLE( - {15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}), - BasicDecimal256FromLE( - {11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}), - BasicDecimal256FromLE( - {5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}), - BasicDecimal256FromLE( - {3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}), - BasicDecimal256FromLE( - {40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}), - BasicDecimal256FromLE( - {401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}), - BasicDecimal256FromLE( - {4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}), - BasicDecimal256FromLE( - {3225492126871584768ULL, 15731123957095624514ULL, 146936793ULL, 0ULL}), - BasicDecimal256FromLE( - {13808177195006296064ULL, 9737286981279832213ULL, 1469367938ULL, 0ULL}), - BasicDecimal256FromLE( - {8954563434096099328ULL, 5139149444250564057ULL, 14693679385ULL, 0ULL}), - BasicDecimal256FromLE( - {15758658046122786816ULL, 14498006295086537342ULL, 146936793852ULL, 0ULL}), - BasicDecimal256FromLE( - {10012627871551455232ULL, 15852854434898512116ULL, 1469367938527ULL, 0ULL}), - BasicDecimal256FromLE( - {7892558346966794240ULL, 10954591759308708237ULL, 14693679385278ULL, 0ULL}), - BasicDecimal256FromLE( - {5138607174829735936ULL, 17312197224539324294ULL, 146936793852785ULL, 0ULL}), - BasicDecimal256FromLE( - {14492583600878256128ULL, 7101275582007278398ULL, 1469367938527859ULL, 0ULL}), - BasicDecimal256FromLE( - {15798627492815699968ULL, 15672523598944129139ULL, 14693679385278593ULL, 0ULL}), - BasicDecimal256FromLE( - {10412322338480586752ULL, 9151283399764878470ULL, 146936793852785938ULL, 0ULL}), - BasicDecimal256FromLE( - {11889503016258109440ULL, 17725857702810578241ULL, 1469367938527859384ULL, 0ULL}), - BasicDecimal256FromLE( - {8214565720323784704ULL, 11237880364719817872ULL, 14693679385278593849ULL, 0ULL}), - BasicDecimal256FromLE( - {8358680908399640576ULL, 1698339204940869028ULL, 17809585336819077184ULL, 7ULL}), - BasicDecimal256FromLE({9799832789158199296ULL, 16983392049408690284ULL, - 12075156704804807296ULL, 79ULL}), - BasicDecimal256FromLE({5764607523034234880ULL, 3813223830700938301ULL, - 10071102605790763273ULL, 796ULL}), - BasicDecimal256FromLE({2305843009213693952ULL, 1238750159590279781ULL, - 8477305689359874652ULL, 7965ULL}), - BasicDecimal256FromLE({4611686018427387904ULL, 12387501595902797811ULL, - 10986080598760540056ULL, 79654ULL}), - BasicDecimal256FromLE({9223372036854775808ULL, 13194551516770668416ULL, - 17627085619057642486ULL, 796545ULL}), - BasicDecimal256FromLE( - {0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}), - BasicDecimal256FromLE( - {0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}), - BasicDecimal256FromLE( - {0ULL, 5129504068339011060ULL, 10445028665020693435ULL, 796545955ULL}), - BasicDecimal256FromLE( - {0ULL, 14401552535971007368ULL, 12216566281659176272ULL, 7965459555ULL}), - BasicDecimal256FromLE( - {0ULL, 14888316843743212368ULL, 11485198374334453031ULL, 79654595556ULL}), - BasicDecimal256FromLE( - {0ULL, 1309215847755710752ULL, 4171519301087220622ULL, 796545955566ULL}), - BasicDecimal256FromLE( - {0ULL, 13092158477557107520ULL, 4821704863453102988ULL, 7965459555662ULL}), - BasicDecimal256FromLE( - {0ULL, 1794376259604213888ULL, 11323560487111926655ULL, 79654595556622ULL}), - BasicDecimal256FromLE( - {0ULL, 17943762596042138880ULL, 2555140428861956854ULL, 796545955566226ULL}), - BasicDecimal256FromLE( - {0ULL, 13416929297035424256ULL, 7104660214910016933ULL, 7965459555662261ULL}), - BasicDecimal256FromLE( - {0ULL, 5042084454387381248ULL, 15706369927971514489ULL, 79654595556622613ULL}), - BasicDecimal256FromLE( - {0ULL, 13527356396454709248ULL, 9489746690038731964ULL, 796545955566226138ULL})}; - -#undef BasicDecimal256FromLE - #ifdef ARROW_USE_NATIVE_INT128 static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF; #else static constexpr uint64_t kInt32Mask = 0xFFFFFFFF; #endif -// same as ScaleMultipliers[38] - 1 -static constexpr BasicDecimal128 kMaxValue = - BasicDecimal128(5421010862427522170LL, 687399551400673280ULL - 1); - -constexpr int BasicDecimal128::kMaxPrecision; -constexpr int BasicDecimal128::kMaxScale; +// same as kDecimal128PowersOfTen[38] - 1 +static constexpr BasicDecimal128 kMaxDecimal128Value{5421010862427522170LL, + 687399551400673280ULL - 1}; BasicDecimal128& BasicDecimal128::Negate() { uint64_t result_lo = ~low_bits() + 1; @@ -394,7 +74,7 @@ BasicDecimal128 BasicDecimal128::Abs(const BasicDecimal128& in) { bool BasicDecimal128::FitsInPrecision(int32_t precision) const { DCHECK_GT(precision, 0); DCHECK_LE(precision, 38); - return BasicDecimal128::Abs(*this) < ScaleMultipliers[precision]; + return BasicDecimal128::Abs(*this) < kDecimal128PowersOfTen[precision]; } BasicDecimal128& BasicDecimal128::operator+=(const BasicDecimal128& right) { @@ -799,7 +479,7 @@ static DecimalStatus BuildFromArray(BasicDecimal256* value, const uint32_t* arra if (status != DecimalStatus::kSuccess) { return status; } - *value = result_array; + *value = BasicDecimal256(result_array); return DecimalStatus::kSuccess; } @@ -951,14 +631,6 @@ DecimalStatus BasicDecimal128::Divide(const BasicDecimal128& divisor, return DecimalDivide(*this, divisor, result, remainder); } -bool operator==(const BasicDecimal128& left, const BasicDecimal128& right) { - return left.high_bits() == right.high_bits() && left.low_bits() == right.low_bits(); -} - -bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right) { - return !operator==(left, right); -} - bool operator<(const BasicDecimal128& left, const BasicDecimal128& right) { return left.high_bits() < right.high_bits() || (left.high_bits() == right.high_bits() && left.low_bits() < right.low_bits()); @@ -1072,7 +744,7 @@ void BasicDecimal128::GetWholeAndFraction(int scale, BasicDecimal128* whole, DCHECK_GE(scale, 0); DCHECK_LE(scale, 38); - BasicDecimal128 multiplier(ScaleMultipliers[scale]); + BasicDecimal128 multiplier(kDecimal128PowersOfTen[scale]); auto s = Divide(multiplier, whole, fraction); DCHECK_EQ(s, DecimalStatus::kSuccess); } @@ -1081,29 +753,29 @@ const BasicDecimal128& BasicDecimal128::GetScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 38); - return ScaleMultipliers[scale]; + return kDecimal128PowersOfTen[scale]; } const BasicDecimal128& BasicDecimal128::GetHalfScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 38); - return ScaleMultipliersHalf[scale]; + return kDecimal128HalfPowersOfTen[scale]; } -const BasicDecimal128& BasicDecimal128::GetMaxValue() { return kMaxValue; } +const BasicDecimal128& BasicDecimal128::GetMaxValue() { return kMaxDecimal128Value; } BasicDecimal128 BasicDecimal128::GetMaxValue(int32_t precision) { DCHECK_GE(precision, 0); DCHECK_LE(precision, 38); - return ScaleMultipliers[precision] - 1; + return kDecimal128PowersOfTen[precision] - 1; } BasicDecimal128 BasicDecimal128::IncreaseScaleBy(int32_t increase_by) const { DCHECK_GE(increase_by, 0); DCHECK_LE(increase_by, 38); - return (*this) * ScaleMultipliers[increase_by]; + return (*this) * kDecimal128PowersOfTen[increase_by]; } BasicDecimal128 BasicDecimal128::ReduceScaleBy(int32_t reduce_by, bool round) const { @@ -1114,13 +786,13 @@ BasicDecimal128 BasicDecimal128::ReduceScaleBy(int32_t reduce_by, bool round) co return *this; } - BasicDecimal128 divisor(ScaleMultipliers[reduce_by]); + BasicDecimal128 divisor(kDecimal128PowersOfTen[reduce_by]); BasicDecimal128 result; BasicDecimal128 remainder; auto s = Divide(divisor, &result, &remainder); DCHECK_EQ(s, DecimalStatus::kSuccess); if (round) { - auto divisor_half = ScaleMultipliersHalf[reduce_by]; + auto divisor_half = kDecimal128HalfPowersOfTen[reduce_by]; if (remainder.Abs() >= divisor_half) { result += Sign(); } @@ -1138,9 +810,6 @@ int32_t BasicDecimal128::CountLeadingBinaryZeros() const { } } -constexpr int BasicDecimal256::kMaxPrecision; -constexpr int BasicDecimal256::kMaxScale; - BasicDecimal256& BasicDecimal256::Negate() { auto array_le = bit_util::little_endian::Make(&array_); uint64_t carry = 1; @@ -1188,14 +857,14 @@ BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) { if (bits == 0) { return *this; } - int cross_word_shift = bits / 64; - if (static_cast(cross_word_shift) >= array_.size()) { + const int cross_word_shift = bits / 64; + if (cross_word_shift >= kWordWidth) { array_ = {0, 0, 0, 0}; return *this; } uint32_t in_word_shift = bits % 64; auto array_le = bit_util::little_endian::Make(&array_); - for (int i = static_cast(array_.size() - 1); i >= cross_word_shift; i--) { + for (int i = kWordWidth; i >= cross_word_shift; i--) { // Account for shifts larger then 64 bits array_le[i] = array_le[i - cross_word_shift]; array_le[i] <<= in_word_shift; @@ -1209,6 +878,35 @@ BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) { return *this; } +BasicDecimal256& BasicDecimal256::operator>>=(uint32_t bits) { + if (bits == 0) { + return *this; + } + const uint64_t extended = + static_cast(static_cast(array_[kHighWordIndex]) >> 63); + const int cross_word_shift = bits / 64; + if (cross_word_shift >= kWordWidth) { + array_.fill(extended); + return *this; + } + const uint32_t in_word_shift = bits % 64; + const auto array_le = little_endian_array(); + // Initialize with sign-extended words + WordArray shifted_le; + shifted_le.fill(extended); + // Iterate from LSW to MSW + for (int i = cross_word_shift; i < kWordWidth; ++i) { + shifted_le[i - cross_word_shift] = array_le[i] >> in_word_shift; + if (in_word_shift != 0) { + const uint64_t carry_bits = (i + 1 < kWordWidth ? array_le[i + 1] : extended) + << (64 - in_word_shift); + shifted_le[i - cross_word_shift] |= carry_bits; + } + } + array_ = bit_util::little_endian::ToNative(shifted_le); + return *this; +} + BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) { // Since the max value of BasicDecimal256 is supposed to be 1e76 - 1 and the // min the negation taking the absolute values here should always be safe. @@ -1240,7 +938,7 @@ BasicDecimal256 BasicDecimal256::IncreaseScaleBy(int32_t increase_by) const { DCHECK_GE(increase_by, 0); DCHECK_LE(increase_by, 76); - return (*this) * ScaleMultipliersDecimal256[increase_by]; + return (*this) * kDecimal256PowersOfTen[increase_by]; } BasicDecimal256 BasicDecimal256::ReduceScaleBy(int32_t reduce_by, bool round) const { @@ -1251,13 +949,13 @@ BasicDecimal256 BasicDecimal256::ReduceScaleBy(int32_t reduce_by, bool round) co return *this; } - BasicDecimal256 divisor(ScaleMultipliersDecimal256[reduce_by]); + BasicDecimal256 divisor(kDecimal256PowersOfTen[reduce_by]); BasicDecimal256 result; BasicDecimal256 remainder; auto s = Divide(divisor, &result, &remainder); DCHECK_EQ(s, DecimalStatus::kSuccess); if (round) { - auto divisor_half = ScaleMultipliersHalfDecimal256[reduce_by]; + auto divisor_half = kDecimal256HalfPowersOfTen[reduce_by]; if (remainder.Abs() >= divisor_half) { result += Sign(); } @@ -1268,27 +966,27 @@ BasicDecimal256 BasicDecimal256::ReduceScaleBy(int32_t reduce_by, bool round) co bool BasicDecimal256::FitsInPrecision(int32_t precision) const { DCHECK_GT(precision, 0); DCHECK_LE(precision, 76); - return BasicDecimal256::Abs(*this) < ScaleMultipliersDecimal256[precision]; + return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision]; } const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 76); - return ScaleMultipliersDecimal256[scale]; + return kDecimal256PowersOfTen[scale]; } const BasicDecimal256& BasicDecimal256::GetHalfScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 76); - return ScaleMultipliersHalfDecimal256[scale]; + return kDecimal256HalfPowersOfTen[scale]; } BasicDecimal256 BasicDecimal256::GetMaxValue(int32_t precision) { DCHECK_GE(precision, 0); DCHECK_LE(precision, 76); - return ScaleMultipliersDecimal256[precision] + (-1); + return kDecimal256PowersOfTen[precision] + (-1); } BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256& right) { diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index b071c5f0ff7..cc13d640c73 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -45,13 +45,16 @@ class ARROW_EXPORT GenericBasicDecimal { #if ARROW_LITTLE_ENDIAN static constexpr int kHighWordIndex = NWORDS - 1; + static constexpr int kLowWordIndex = 0; #else static constexpr int kHighWordIndex = 0; + static constexpr int kLowWordIndex = NWORDS - 1; #endif public: static constexpr int kBitWidth = BIT_WIDTH; static constexpr int kByteWidth = kBitWidth / 8; + static constexpr int kWordWidth = NWORDS; // A constructor tag to introduce a little-endian encoded array static constexpr LittleEndianArrayTag LittleEndianArray{}; @@ -64,8 +67,7 @@ class ARROW_EXPORT GenericBasicDecimal { /// \brief Create a decimal from the two's complement representation. /// /// Input array is assumed to be in native endianness. - constexpr GenericBasicDecimal( - const WordArray& array) noexcept // NOLINT(runtime/explicit) + explicit constexpr GenericBasicDecimal(const WordArray& array) noexcept : array_(array) {} /// \brief Create a decimal from the two's complement representation. @@ -74,6 +76,13 @@ class ARROW_EXPORT GenericBasicDecimal { GenericBasicDecimal(LittleEndianArrayTag, const WordArray& array) noexcept : GenericBasicDecimal(bit_util::little_endian::ToNative(array)) {} + /// \brief Create a decimal from any integer not wider than 64 bits. + template ::value && (sizeof(T) <= sizeof(uint64_t)), T>::type> + constexpr GenericBasicDecimal(T value) noexcept // NOLINT(runtime/explicit) + : array_(WordsFromLowBits(value)) {} + /// \brief Create a decimal from an array of bytes. /// /// Bytes are assumed to be in native-endian byte order. @@ -124,8 +133,37 @@ class ARROW_EXPORT GenericBasicDecimal { bool IsNegative() const { return static_cast(array_[kHighWordIndex]) < 0; } + explicit operator bool() const { return array_ != WordArray{}; } + + friend bool operator==(const GenericBasicDecimal& left, + const GenericBasicDecimal& right) { + return left.array_ == right.array_; + } + + friend bool operator!=(const GenericBasicDecimal& left, + const GenericBasicDecimal& right) { + return left.array_ != right.array_; + } + protected: WordArray array_; + + template + static constexpr uint64_t SignExtend(T low_bits) noexcept { + return low_bits >= T{} ? uint64_t{0} : ~uint64_t{0}; + } + + template + static constexpr WordArray WordsFromLowBits(T low_bits) { + WordArray words{}; + if (low_bits < T{}) { + for (auto& word : words) { + word = ~uint64_t{0}; + } + } + words[kLowWordIndex] = static_cast(low_bits); + return words; + } }; /// Represents a signed 128-bit integer in two's complement. @@ -150,14 +188,6 @@ class ARROW_EXPORT BasicDecimal128 : public GenericBasicDecimal(high), low}) {} #endif - /// \brief Convert any integer value into a BasicDecimal128. - template ::value && (sizeof(T) <= sizeof(uint64_t)), T>::type> - constexpr BasicDecimal128(T value) noexcept // NOLINT(runtime/explicit) - : BasicDecimal128(value >= T{0} ? 0 : -1, static_cast(value)) { // NOLINT - } - /// \brief Negate the current value (in-place) BasicDecimal128& Negate(); @@ -208,7 +238,9 @@ class ARROW_EXPORT BasicDecimal128 : public GenericBasicDecimal>=(uint32_t bits); BasicDecimal128 operator>>(uint32_t bits) const { @@ -284,8 +316,6 @@ class ARROW_EXPORT BasicDecimal128 : public GenericBasicDecimal(const BasicDecimal128& left, const BasicDecimal128& right); @@ -305,14 +335,6 @@ ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left, const BasicDecimal128& right); class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal { - private: - // Due to a bug in clang, we have to declare the extend method prior to its - // usage. - template - static constexpr uint64_t extend(T low_bits) noexcept { - return low_bits >= T() ? uint64_t{0} : ~uint64_t{0}; - } - public: using GenericBasicDecimal::GenericBasicDecimal; @@ -321,19 +343,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal::value && (sizeof(T) <= sizeof(uint64_t)), T>::type> - constexpr BasicDecimal256(T value) noexcept // NOLINT(runtime/explicit) - : BasicDecimal256(bit_util::little_endian::ToNative( - {static_cast(value), extend(value), extend(value), - extend(value)})) {} - explicit BasicDecimal256(const BasicDecimal128& value) noexcept : BasicDecimal256(bit_util::little_endian::ToNative( {value.low_bits(), static_cast(value.high_bits()), - extend(value.high_bits()), extend(value.high_bits())})) {} + SignExtend(value.high_bits()), SignExtend(value.high_bits())})) {} /// \brief Negate the current value (in-place) BasicDecimal256& Negate(); @@ -403,6 +416,17 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal>=(uint32_t bits); + + BasicDecimal256 operator>>(uint32_t bits) const { + auto res = *this; + res >>= bits; + return res; + } + /// \brief In-place division. BasicDecimal256& operator/=(const BasicDecimal256& right); @@ -435,16 +459,6 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal(std::numeric_limits::digits10); - -static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = { - // clang-format off - 1ULL, - 10ULL, - 100ULL, - 1000ULL, - 10000ULL, - 100000ULL, - 1000000ULL, - 10000000ULL, - 100000000ULL, - 1000000000ULL, - 10000000000ULL, - 100000000000ULL, - 1000000000000ULL, - 10000000000000ULL, - 100000000000000ULL, - 1000000000000000ULL, - 10000000000000000ULL, - 100000000000000000ULL, - 1000000000000000000ULL - // clang-format on +struct BaseDecimalRealConversion { + // Return 10**exp, with a fast lookup, assuming `exp` is withing bounds + template + static Real PowerOfTen(int32_t exp) { + DCHECK(exp >= -76 && exp <= 76); + return RealTraits::powers_of_ten()[exp + 76]; + } + + // Return 10**exp, with a fast lookup if possible + template + static Real LargePowerOfTen(int32_t exp) { + if (ARROW_PREDICT_TRUE(exp >= -76 && exp <= 76)) { + return RealTraits::powers_of_ten()[exp + 76]; + } else { + return std::pow(static_cast(10), static_cast(exp)); + } + } }; -static constexpr float kFloatPowersOfTen[2 * 38 + 1] = { - 1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f, - 1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f, - 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f, - 1e-8f, 1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, 1e1f, - 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f, 1e11f, - 1e12f, 1e13f, 1e14f, 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f, - 1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, 1e29f, 1e30f, 1e31f, - 1e32f, 1e33f, 1e34f, 1e35f, 1e36f, 1e37f, 1e38f}; - -static constexpr double kDoublePowersOfTen[2 * 38 + 1] = { - 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, - 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, - 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, - 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, - 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, - 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, - 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38}; - -// On the Windows R toolchain, INFINITY is double type instead of float -static constexpr float kFloatInf = std::numeric_limits::infinity(); -static constexpr float kFloatPowersOfTen76[2 * 76 + 1] = { - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1e-45f, 1e-44f, 1e-43f, 1e-42f, - 1e-41f, 1e-40f, 1e-39f, 1e-38f, 1e-37f, 1e-36f, 1e-35f, - 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f, 1e-28f, - 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, - 1e-20f, 1e-19f, 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, - 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f, 1e-8f, 1e-7f, - 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, - 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, - 1e8f, 1e9f, 1e10f, 1e11f, 1e12f, 1e13f, 1e14f, - 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f, - 1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, - 1e29f, 1e30f, 1e31f, 1e32f, 1e33f, 1e34f, 1e35f, - 1e36f, 1e37f, 1e38f, kFloatInf, kFloatInf, kFloatInf, kFloatInf, - kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, - kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, - kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, - kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, - kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf}; - -static constexpr double kDoublePowersOfTen76[2 * 76 + 1] = { - 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, - 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, - 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, - 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, - 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, - 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, - 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, - 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, - 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, - 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43, - 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, - 1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, - 1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76}; +template +struct DecimalRealConversion : public BaseDecimalRealConversion { + using DecimalTypeTraits = DecimalTraits; + static constexpr int kMaxPrecision = DecimalType::kMaxPrecision; + static constexpr int kMaxScale = DecimalType::kMaxScale; -namespace { + static const auto DecimalPowerOfTen(int exp) { + DCHECK(exp >= 0 && exp <= kMaxPrecision); + return DecimalTypeTraits::powers_of_ten()[exp]; + } -template -struct DecimalRealConversion { - static Result FromPositiveReal(Real real, int32_t precision, - int32_t scale) { - auto x = real; - if (scale >= -38 && scale <= 38) { - x *= Derived::powers_of_ten()[scale + 38]; - } else { - x *= std::pow(static_cast(10), static_cast(scale)); + template + static Status OverflowError(Real real, int precision, int scale) { + return Status::Invalid("Cannot convert ", real, " to ", DecimalTypeTraits::kTypeName, + "(precision = ", precision, ", scale = ", scale, + "): overflow"); + } + + template + static Result FromPositiveReal(Real real, int32_t precision, + int32_t scale) { + constexpr int kMantissaBits = RealTraits::kMantissaBits; + constexpr int kMantissaDigits = RealTraits::kMantissaDigits; + + // Problem statement: construct the Decimal with the value + // closest to `real * 10^scale`. + if (scale < 0) { + // Negative scales are not handled below, fall back to approx algorithm + return Derived::FromPositiveRealApprox(real, precision, scale); } - x = std::nearbyint(x); - const auto max_abs = Derived::powers_of_ten()[precision + 38]; - if (x <= -max_abs || x >= max_abs) { - return Status::Invalid("Cannot convert ", real, - " to Decimal128(precision = ", precision, - ", scale = ", scale, "): overflow"); + + // 1. Check that `real` is within acceptable bounds. + const Real limit = PowerOfTen(precision - scale); + if (real > limit) { + // Checking the limit early helps ensure the computations below do not + // overflow. + // NOTE: `limit` is allowed here as rounding can make it smaller than + // the theoretical limit (for example, 1.0e23 < 10^23). + return OverflowError(real, precision, scale); } - // Extract high and low bits - const auto high = std::floor(std::ldexp(x, -64)); - const auto low = x - std::ldexp(high, 64); - DCHECK_GE(high, -9.223372036854776e+18); // -2**63 - DCHECK_LT(high, 9.223372036854776e+18); // 2**63 - DCHECK_GE(low, 0); - DCHECK_LT(low, 1.8446744073709552e+19); // 2**64 - return Decimal128(static_cast(high), static_cast(low)); + // 2. Losslessly convert `real` to `mant * 2**k` + int binary_exp = 0; + const Real real_mant = std::frexp(real, &binary_exp); + // `real_mant` is within 0.5 and 1 and has M bits of precision. + // Multiply it by 2^M to get an exact integer. + const uint64_t mant = static_cast(std::ldexp(real_mant, kMantissaBits)); + const int k = binary_exp - kMantissaBits; + // (note that `real = mant * 2^k`) + + // 3. Start with `mant`. + // We want to end up with `real * 10^scale` i.e. `mant * 2^k * 10^scale`. + DecimalType x(mant); + + if (k < 0) { + // k < 0 (i.e. binary_exp < kMantissaBits), is probably the common case + // when converting to decimal. It implies right-shifting by -k bits, + // while multiplying by 10^scale. We also must avoid overflow (losing + // bits on the left) and precision loss (losing bits on the right). + int right_shift_by = -k; + int mul_by_ten_to = scale; + + // At this point, `x` has kMantissaDigits significant digits but it can + // fit kMaxPrecision (excluding sign). We can therefore multiply by up + // to 10^(kMaxPrecision - kMantissaDigits). + constexpr int kSafeMulByTenTo = kMaxPrecision - kMantissaDigits; + + if (mul_by_ten_to <= kSafeMulByTenTo) { + // Scale is small enough, so we can do it all at once. + x *= DecimalPowerOfTen(mul_by_ten_to); + x = Derived::RoundedRightShift(x, right_shift_by); + } else { + // Scale is too large, we cannot multiply at once without overflow. + // We use an iterative algorithm which alternately shifts left by + // multiplying by a power of ten, and shifts right by a number of bits. + + // First multiply `x` by as large a power of ten as possible + // without overflowing. + x *= DecimalPowerOfTen(kSafeMulByTenTo); + mul_by_ten_to -= kSafeMulByTenTo; + + // `x` now has full precision. However, we know we'll only + // keep `precision` digits at the end. Extraneous bits/digits + // on the right can be safely shifted away, before multiplying + // again. + // NOTE: if `precision` is the full precision then the algorithm will + // lose the last digit. If `precision` is almost the full precision, + // there can be an off-by-one error due to rounding. + const int mul_step = std::max(1, kMaxPrecision - precision); + + // The running exponent, useful to compute by how much we must + // shift right to make place on the left before the next multiply. + int total_exp = 0; + int total_shift = 0; + while (mul_by_ten_to > 0 && right_shift_by > 0) { + const int exp = std::min(mul_by_ten_to, mul_step); + total_exp += exp; + // The supplementary right shift required so that + // `x * 10^total_exp / 2^total_shift` fits in the decimal. + const int bits = + std::min(right_shift_by, kCeilLog2PowersOfTen[total_exp] - total_shift); + total_shift += bits; + // Right shift to make place on the left, then multiply + x = Derived::RoundedRightShift(x, bits); + right_shift_by -= bits; + // Should not overflow thanks to the precautions taken + x *= DecimalPowerOfTen(exp); + mul_by_ten_to -= exp; + } + if (mul_by_ten_to > 0) { + x *= DecimalPowerOfTen(mul_by_ten_to); + } + if (right_shift_by > 0) { + x = Derived::RoundedRightShift(x, right_shift_by); + } + } + } else { + // k >= 0 implies left-shifting by k bits and multiplying by 10^scale. + // The order of these operations therefore doesn't matter. We know + // we won't overflow because of the limit check above, and we also + // won't lose any significant bits on the right. + x *= DecimalPowerOfTen(scale); + x <<= k; + } + + // Rounding might have pushed `x` just above the max precision, check again + if (!x.FitsInPrecision(precision)) { + return OverflowError(real, precision, scale); + } + return x; } - static Result FromReal(Real x, int32_t precision, int32_t scale) { + template + static Result FromReal(Real x, int32_t precision, int32_t scale) { DCHECK_GT(precision, 0); - DCHECK_LE(precision, 38); + DCHECK_LE(precision, kMaxPrecision); + DCHECK_GE(scale, -kMaxScale); + DCHECK_LE(scale, kMaxScale); if (!std::isfinite(x)) { return Status::Invalid("Cannot convert ", x, " to Decimal128"); } + if (x == 0) { + return DecimalType{}; + } if (x < 0) { ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale)); return dec.Negate(); } else { - // Includes negative zero return FromPositiveReal(x, precision, scale); } } - static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { - Real x = static_cast(decimal.high_bits()) * Derived::two_to_64(); - x += static_cast(decimal.low_bits()); - if (scale >= -38 && scale <= 38) { - x *= Derived::powers_of_ten()[-scale + 38]; + template + static Real ToReal(const DecimalType& decimal, int32_t scale) { + DCHECK_GE(scale, -kMaxScale); + DCHECK_LE(scale, kMaxScale); + if (decimal.IsNegative()) { + // Convert the absolute value to avoid precision loss + auto abs = decimal; + abs.Negate(); + return -Derived::template ToRealPositive(abs, scale); } else { - x *= std::pow(static_cast(10), static_cast(-scale)); + return Derived::template ToRealPositive(decimal, scale); } - return x; } +}; - static Real ToReal(Decimal128 decimal, int32_t scale) { - if (decimal.high_bits() < 0) { - // Convert the absolute value to avoid precision loss - decimal.Negate(); - return -ToRealPositive(decimal, scale); +struct Decimal128RealConversion + : public DecimalRealConversion { + using Base = DecimalRealConversion; + using Base::LargePowerOfTen; + using Base::PowerOfTen; + + // Right shift positive `x` by positive `bits`, rounded half to even + static Decimal128 RoundedRightShift(const Decimal128& x, int bits) { + if (bits == 0) { + return x; + } + int64_t result_hi = x.high_bits(); + uint64_t result_lo = x.low_bits(); + uint64_t shifted = 0; + while (bits >= 64) { + // Retain the information that set bits were shifted right. + // This is important to detect an exact half. + shifted = result_lo | (shifted > 0); + result_lo = result_hi; + result_hi >>= 63; // for sign + bits -= 64; + } + if (bits > 0) { + shifted = (result_lo << (64 - bits)) | (shifted > 0); + result_lo >>= bits; + result_lo |= static_cast(result_hi) << (64 - bits); + result_hi >>= bits; + } + // We almost have our result, but now do the rounding. + constexpr uint64_t kHalf = 0x8000000000000000ULL; + if (shifted > kHalf) { + // Strictly more than half => round up + result_lo += 1; + result_hi += (result_lo == 0); + } else if (shifted == kHalf) { + // Exactly half => round to even + if ((result_lo & 1) != 0) { + result_lo += 1; + result_hi += (result_lo == 0); + } } else { - return ToRealPositive(decimal, scale); + // Strictly less than half => round down } + return Decimal128{result_hi, result_lo}; } -}; -struct DecimalFloatConversion - : public DecimalRealConversion { - static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; } - - static constexpr float two_to_64() { return 1.8446744e+19f; } -}; + template + static Result FromPositiveRealApprox(Real real, int32_t precision, + int32_t scale) { + // Approximate algorithm that operates in the FP domain (thus subject + // to precision loss). + const auto x = std::nearbyint(real * PowerOfTen(scale)); + const auto max_abs = PowerOfTen(precision); + if (x <= -max_abs || x >= max_abs) { + return OverflowError(real, precision, scale); + } + // Extract high and low bits + const auto high = std::floor(std::ldexp(x, -64)); + const auto low = x - std::ldexp(high, 64); -struct DecimalDoubleConversion - : public DecimalRealConversion { - static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; } + DCHECK_GE(high, 0); + DCHECK_LT(high, 9.223372036854776e+18); // 2**63 + DCHECK_GE(low, 0); + DCHECK_LT(low, 1.8446744073709552e+19); // 2**64 + return Decimal128(static_cast(high), static_cast(low)); + } - static constexpr double two_to_64() { return 1.8446744073709552e+19; } + template + static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + Real x = RealTraits::two_to_64(static_cast(decimal.high_bits())); + x += static_cast(decimal.low_bits()); + x *= LargePowerOfTen(-scale); + return x; + } }; } // namespace +Decimal128::Decimal128(const std::string& str) : Decimal128() { + *this = Decimal128::FromString(str).ValueOrDie(); +} + Result Decimal128::FromReal(float x, int32_t precision, int32_t scale) { - return DecimalFloatConversion::FromReal(x, precision, scale); + return Decimal128RealConversion::FromReal(x, precision, scale); } Result Decimal128::FromReal(double x, int32_t precision, int32_t scale) { - return DecimalDoubleConversion::FromReal(x, precision, scale); + return Decimal128RealConversion::FromReal(x, precision, scale); } float Decimal128::ToFloat(int32_t scale) const { - return DecimalFloatConversion::ToReal(*this, scale); + return Decimal128RealConversion::ToReal(*this, scale); } double Decimal128::ToDouble(int32_t scale) const { - return DecimalDoubleConversion::ToReal(*this, scale); + return Decimal128RealConversion::ToReal(*this, scale); } template @@ -782,22 +879,67 @@ Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const { namespace { -template -struct Decimal256RealConversion { - static Result FromPositiveReal(Real real, int32_t precision, - int32_t scale) { - auto x = real; - if (scale >= -76 && scale <= 76) { - x *= Derived::powers_of_ten()[scale + 76]; +struct Decimal256RealConversion + : public DecimalRealConversion { + using Base = DecimalRealConversion; + using Base::LargePowerOfTen; + using Base::PowerOfTen; + + // Right shift positive `x` by positive `bits`, rounded half to even + static Decimal256 RoundedRightShift(Decimal256 x, int bits) { + if (bits == 0) { + return x; + } + const int cross_word_shift = bits / 64; + if (cross_word_shift >= Decimal256::kWordWidth) { + return Decimal256(); + } + const uint32_t in_word_shift = bits % 64; + const auto array_le = x.little_endian_array(); + Decimal256::WordArray shifted_le{}; + uint64_t shifted_out = 0; + // Iterate from LSW to MSW + for (int i = 0; i < cross_word_shift; ++i) { + // Retain the information that non-zero bits were shifted out. + // This is important for half-to-even rounding. + shifted_out = (shifted_out > 0) | array_le[i]; + } + if (in_word_shift != 0) { + const uint64_t carry_bits = array_le[cross_word_shift] << (64 - in_word_shift); + shifted_out = (shifted_out > 0) | (shifted_out >> in_word_shift) | carry_bits; + } + for (int i = cross_word_shift; i < Decimal256::kWordWidth; ++i) { + shifted_le[i - cross_word_shift] = array_le[i] >> in_word_shift; + if (in_word_shift != 0 && i + 1 < Decimal256::kWordWidth) { + const uint64_t carry_bits = array_le[i + 1] << (64 - in_word_shift); + shifted_le[i - cross_word_shift] |= carry_bits; + } + } + auto result = Decimal256(Decimal256::LittleEndianArray, shifted_le); + + // We almost have our result, but now do the rounding. + constexpr uint64_t kHalf = 0x8000000000000000ULL; + if (shifted_out > kHalf) { + // Strictly more than half => round up + result += 1; + } else if (shifted_out == kHalf) { + // Exactly half => round to even + if ((result.low_bits() & 1) != 0) { + result += 1; + } } else { - x *= std::pow(static_cast(10), static_cast(scale)); + // Strictly less than half => round down } - x = std::nearbyint(x); - const auto max_abs = Derived::powers_of_ten()[precision + 76]; + return result; + } + + template + static Result FromPositiveRealApprox(Real real, int32_t precision, + int32_t scale) { + auto x = std::nearbyint(real * PowerOfTen(scale)); + const auto max_abs = PowerOfTen(precision); if (x >= max_abs) { - return Status::Invalid("Cannot convert ", real, - " to Decimal256(precision = ", precision, - ", scale = ", scale, "): overflow"); + return OverflowError(real, precision, scale); } // Extract parts const auto part3 = std::floor(std::ldexp(x, -192)); @@ -809,95 +951,48 @@ struct Decimal256RealConversion { const auto part0 = x; DCHECK_GE(part3, 0); - DCHECK_LT(part3, 1.8446744073709552e+19); // 2**64 + DCHECK_LT(part3, 9.223372036854776e+18); // 2**63 DCHECK_GE(part2, 0); DCHECK_LT(part2, 1.8446744073709552e+19); // 2**64 DCHECK_GE(part1, 0); DCHECK_LT(part1, 1.8446744073709552e+19); // 2**64 DCHECK_GE(part0, 0); DCHECK_LT(part0, 1.8446744073709552e+19); // 2**64 - return Decimal256(bit_util::little_endian::ToNative( - {static_cast(part0), static_cast(part1), - static_cast(part2), static_cast(part3)})); - } - - static Result FromReal(Real x, int32_t precision, int32_t scale) { - DCHECK_GT(precision, 0); - DCHECK_LE(precision, 76); - - if (!std::isfinite(x)) { - return Status::Invalid("Cannot convert ", x, " to Decimal256"); - } - if (x < 0) { - ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale)); - return dec.Negate(); - } else { - // Includes negative zero - return FromPositiveReal(x, precision, scale); - } + return Decimal256(Decimal256::LittleEndianArray, + {static_cast(part0), static_cast(part1), + static_cast(part2), static_cast(part3)}); } + template static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { DCHECK_GE(decimal, 0); Real x = 0; const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); - x += Derived::two_to_192(static_cast(parts_le[3])); - x += Derived::two_to_128(static_cast(parts_le[2])); - x += Derived::two_to_64(static_cast(parts_le[1])); + x += RealTraits::two_to_192(static_cast(parts_le[3])); + x += RealTraits::two_to_128(static_cast(parts_le[2])); + x += RealTraits::two_to_64(static_cast(parts_le[1])); x += static_cast(parts_le[0]); - if (scale >= -76 && scale <= 76) { - x *= Derived::powers_of_ten()[-scale + 76]; - } else { - x *= std::pow(static_cast(10), static_cast(-scale)); - } + x *= LargePowerOfTen(-scale); return x; } - - static Real ToReal(Decimal256 decimal, int32_t scale) { - if (decimal.IsNegative()) { - // Convert the absolute value to avoid precision loss - decimal.Negate(); - return -ToRealPositive(decimal, scale); - } else { - return ToRealPositive(decimal, scale); - } - } -}; - -struct Decimal256FloatConversion - : public Decimal256RealConversion { - static constexpr const float* powers_of_ten() { return kFloatPowersOfTen76; } - - static float two_to_64(float x) { return x * 1.8446744e+19f; } - static float two_to_128(float x) { return x == 0 ? 0 : INFINITY; } - static float two_to_192(float x) { return x == 0 ? 0 : INFINITY; } -}; - -struct Decimal256DoubleConversion - : public Decimal256RealConversion { - static constexpr const double* powers_of_ten() { return kDoublePowersOfTen76; } - - static double two_to_64(double x) { return x * 1.8446744073709552e+19; } - static double two_to_128(double x) { return x * 3.402823669209385e+38; } - static double two_to_192(double x) { return x * 6.277101735386681e+57; } }; } // namespace Result Decimal256::FromReal(float x, int32_t precision, int32_t scale) { - return Decimal256FloatConversion::FromReal(x, precision, scale); + return Decimal256RealConversion::FromReal(x, precision, scale); } Result Decimal256::FromReal(double x, int32_t precision, int32_t scale) { - return Decimal256DoubleConversion::FromReal(x, precision, scale); + return Decimal256RealConversion::FromReal(x, precision, scale); } float Decimal256::ToFloat(int32_t scale) const { - return Decimal256FloatConversion::ToReal(*this, scale); + return Decimal256RealConversion::ToReal(*this, scale); } double Decimal256::ToDouble(int32_t scale) const { - return Decimal256DoubleConversion::ToReal(*this, scale); + return Decimal256RealConversion::ToReal(*this, scale); } std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) { diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h index fd42a01ae31..345c74d95b1 100644 --- a/cpp/src/arrow/util/decimal.h +++ b/cpp/src/arrow/util/decimal.h @@ -146,9 +146,15 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 { double ToDouble(int32_t scale) const; /// \brief Convert to a floating-point number (scaled) - template + template >> T ToReal(int32_t scale) const { - return ToRealConversion::ToReal(*this, scale); + static_assert(std::is_same_v || std::is_same_v, + "Unexpected floating-point type"); + if constexpr (std::is_same_v) { + return ToFloat(scale); + } else { + return ToDouble(scale); + } } ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, @@ -157,21 +163,6 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 { private: /// Converts internal error code to Status Status ToArrowStatus(DecimalStatus dstatus) const; - - template - struct ToRealConversion {}; -}; - -template <> -struct Decimal128::ToRealConversion { - static float ToReal(const Decimal128& dec, int32_t scale) { return dec.ToFloat(scale); } -}; - -template <> -struct Decimal128::ToRealConversion { - static double ToReal(const Decimal128& dec, int32_t scale) { - return dec.ToDouble(scale); - } }; /// Represents a signed 256-bit integer in two's complement. @@ -262,9 +253,15 @@ class ARROW_EXPORT Decimal256 : public BasicDecimal256 { double ToDouble(int32_t scale) const; /// \brief Convert to a floating-point number (scaled) - template + template >> T ToReal(int32_t scale) const { - return ToRealConversion::ToReal(*this, scale); + static_assert(std::is_same_v || std::is_same_v, + "Unexpected floating-point type"); + if constexpr (std::is_same_v) { + return ToFloat(scale); + } else { + return ToDouble(scale); + } } ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, @@ -273,21 +270,6 @@ class ARROW_EXPORT Decimal256 : public BasicDecimal256 { private: /// Converts internal error code to Status Status ToArrowStatus(DecimalStatus dstatus) const; - - template - struct ToRealConversion {}; -}; - -template <> -struct Decimal256::ToRealConversion { - static float ToReal(const Decimal256& dec, int32_t scale) { return dec.ToFloat(scale); } -}; - -template <> -struct Decimal256::ToRealConversion { - static double ToReal(const Decimal256& dec, int32_t scale) { - return dec.ToDouble(scale); - } }; /// For an integer type, return the max number of decimal digits diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h new file mode 100644 index 00000000000..31309dfc140 --- /dev/null +++ b/cpp/src/arrow/util/decimal_internal.h @@ -0,0 +1,494 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "arrow/type_fwd.h" +#include "arrow/util/basic_decimal.h" +#include "arrow/util/endian.h" +#include "arrow/util/macros.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +constexpr auto kInt64DecimalDigits = + static_cast(std::numeric_limits::digits10); + +constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = { + // clang-format off + 1ULL, + 10ULL, + 100ULL, + 1000ULL, + 10000ULL, + 100000ULL, + 1000000ULL, + 10000000ULL, + 100000000ULL, + 1000000000ULL, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL + // clang-format on +}; + +// On the Windows R toolchain, INFINITY is double type instead of float +constexpr float kFloatInf = std::numeric_limits::infinity(); + +// Attention: these pre-computed constants might not exactly represent their +// decimal counterparts: +// >>> int(1e38) +// 99999999999999997748809823456034029568 + +constexpr float kFloatPowersOfTen[2 * 76 + 1] = { + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1e-45f, 1e-44f, 1e-43f, 1e-42f, + 1e-41f, 1e-40f, 1e-39f, 1e-38f, 1e-37f, 1e-36f, 1e-35f, + 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f, 1e-28f, + 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, + 1e-20f, 1e-19f, 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, + 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f, 1e-8f, 1e-7f, + 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f, + 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, + 1e8f, 1e9f, 1e10f, 1e11f, 1e12f, 1e13f, 1e14f, + 1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f, + 1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f, + 1e29f, 1e30f, 1e31f, 1e32f, 1e33f, 1e34f, 1e35f, + 1e36f, 1e37f, 1e38f, kFloatInf, kFloatInf, kFloatInf, kFloatInf, + kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, + kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, + kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, + kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, + kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf}; + +constexpr double kDoublePowersOfTen[2 * 76 + 1] = { + 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, + 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, + 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, + 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, + 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, + 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, + 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, + 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, + 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43, + 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, + 1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, + 1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76}; + +constexpr BasicDecimal128 kDecimal128PowersOfTen[38 + 1] = { + BasicDecimal128(1LL), + BasicDecimal128(10LL), + BasicDecimal128(100LL), + BasicDecimal128(1000LL), + BasicDecimal128(10000LL), + BasicDecimal128(100000LL), + BasicDecimal128(1000000LL), + BasicDecimal128(10000000LL), + BasicDecimal128(100000000LL), + BasicDecimal128(1000000000LL), + BasicDecimal128(10000000000LL), + BasicDecimal128(100000000000LL), + BasicDecimal128(1000000000000LL), + BasicDecimal128(10000000000000LL), + BasicDecimal128(100000000000000LL), + BasicDecimal128(1000000000000000LL), + BasicDecimal128(10000000000000000LL), + BasicDecimal128(100000000000000000LL), + BasicDecimal128(1000000000000000000LL), + BasicDecimal128(0LL, 10000000000000000000ULL), + BasicDecimal128(5LL, 7766279631452241920ULL), + BasicDecimal128(54LL, 3875820019684212736ULL), + BasicDecimal128(542LL, 1864712049423024128ULL), + BasicDecimal128(5421LL, 200376420520689664ULL), + BasicDecimal128(54210LL, 2003764205206896640ULL), + BasicDecimal128(542101LL, 1590897978359414784ULL), + BasicDecimal128(5421010LL, 15908979783594147840ULL), + BasicDecimal128(54210108LL, 11515845246265065472ULL), + BasicDecimal128(542101086LL, 4477988020393345024ULL), + BasicDecimal128(5421010862LL, 7886392056514347008ULL), + BasicDecimal128(54210108624LL, 5076944270305263616ULL), + BasicDecimal128(542101086242LL, 13875954555633532928ULL), + BasicDecimal128(5421010862427LL, 9632337040368467968ULL), + BasicDecimal128(54210108624275LL, 4089650035136921600ULL), + BasicDecimal128(542101086242752LL, 4003012203950112768ULL), + BasicDecimal128(5421010862427522LL, 3136633892082024448ULL), + BasicDecimal128(54210108624275221LL, 12919594847110692864ULL), + BasicDecimal128(542101086242752217LL, 68739955140067328ULL), + BasicDecimal128(5421010862427522170LL, 687399551400673280ULL)}; + +constexpr BasicDecimal128 kDecimal128HalfPowersOfTen[] = { + BasicDecimal128(0ULL), + BasicDecimal128(5ULL), + BasicDecimal128(50ULL), + BasicDecimal128(500ULL), + BasicDecimal128(5000ULL), + BasicDecimal128(50000ULL), + BasicDecimal128(500000ULL), + BasicDecimal128(5000000ULL), + BasicDecimal128(50000000ULL), + BasicDecimal128(500000000ULL), + BasicDecimal128(5000000000ULL), + BasicDecimal128(50000000000ULL), + BasicDecimal128(500000000000ULL), + BasicDecimal128(5000000000000ULL), + BasicDecimal128(50000000000000ULL), + BasicDecimal128(500000000000000ULL), + BasicDecimal128(5000000000000000ULL), + BasicDecimal128(50000000000000000ULL), + BasicDecimal128(500000000000000000ULL), + BasicDecimal128(5000000000000000000ULL), + BasicDecimal128(2LL, 13106511852580896768ULL), + BasicDecimal128(27LL, 1937910009842106368ULL), + BasicDecimal128(271LL, 932356024711512064ULL), + BasicDecimal128(2710LL, 9323560247115120640ULL), + BasicDecimal128(27105LL, 1001882102603448320ULL), + BasicDecimal128(271050LL, 10018821026034483200ULL), + BasicDecimal128(2710505LL, 7954489891797073920ULL), + BasicDecimal128(27105054LL, 5757922623132532736ULL), + BasicDecimal128(271050543LL, 2238994010196672512ULL), + BasicDecimal128(2710505431LL, 3943196028257173504ULL), + BasicDecimal128(27105054312LL, 2538472135152631808ULL), + BasicDecimal128(271050543121LL, 6937977277816766464ULL), + BasicDecimal128(2710505431213LL, 14039540557039009792ULL), + BasicDecimal128(27105054312137LL, 11268197054423236608ULL), + BasicDecimal128(271050543121376LL, 2001506101975056384ULL), + BasicDecimal128(2710505431213761LL, 1568316946041012224ULL), + BasicDecimal128(27105054312137610LL, 15683169460410122240ULL), + BasicDecimal128(271050543121376108LL, 9257742014424809472ULL), + BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)}; + +#if ARROW_LITTLE_ENDIAN +#define BasicDecimal256FromLE(v1, v2, v3, v4) \ + BasicDecimal256(std::array{v1, v2, v3, v4}) +#else +#define BasicDecimal256FromLE(v1, v2, v3, v4) \ + BasicDecimal256(std::array{v4, v3, v2, v1}) +#endif + +constexpr BasicDecimal256 kDecimal256PowersOfTen[76 + 1] = { + BasicDecimal256FromLE({1ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({100ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({100000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({100000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({100000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({100000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({100000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}), + BasicDecimal256FromLE({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}), + BasicDecimal256FromLE( + {4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}), + BasicDecimal256FromLE( + {11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}), + BasicDecimal256FromLE( + {7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}), + BasicDecimal256FromLE( + {80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}), + BasicDecimal256FromLE( + {802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}), + BasicDecimal256FromLE( + {8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}), + BasicDecimal256FromLE( + {6450984253743169536ULL, 13015503840481697412ULL, 293873587ULL, 0ULL}), + BasicDecimal256FromLE( + {9169610316303040512ULL, 1027829888850112811ULL, 2938735877ULL, 0ULL}), + BasicDecimal256FromLE( + {17909126868192198656ULL, 10278298888501128114ULL, 29387358770ULL, 0ULL}), + BasicDecimal256FromLE( + {13070572018536022016ULL, 10549268516463523069ULL, 293873587705ULL, 0ULL}), + BasicDecimal256FromLE( + {1578511669393358848ULL, 13258964796087472617ULL, 2938735877055ULL, 0ULL}), + BasicDecimal256FromLE( + {15785116693933588480ULL, 3462439444907864858ULL, 29387358770557ULL, 0ULL}), + BasicDecimal256FromLE( + {10277214349659471872ULL, 16177650375369096972ULL, 293873587705571ULL, 0ULL}), + BasicDecimal256FromLE( + {10538423128046960640ULL, 14202551164014556797ULL, 2938735877055718ULL, 0ULL}), + BasicDecimal256FromLE( + {13150510911921848320ULL, 12898303124178706663ULL, 29387358770557187ULL, 0ULL}), + BasicDecimal256FromLE( + {2377900603251621888ULL, 18302566799529756941ULL, 293873587705571876ULL, 0ULL}), + BasicDecimal256FromLE( + {5332261958806667264ULL, 17004971331911604867ULL, 2938735877055718769ULL, 0ULL}), + BasicDecimal256FromLE( + {16429131440647569408ULL, 4029016655730084128ULL, 10940614696847636083ULL, 1ULL}), + BasicDecimal256FromLE({16717361816799281152ULL, 3396678409881738056ULL, + 17172426599928602752ULL, 15ULL}), + BasicDecimal256FromLE({1152921504606846976ULL, 15520040025107828953ULL, + 5703569335900062977ULL, 159ULL}), + BasicDecimal256FromLE({11529215046068469760ULL, 7626447661401876602ULL, + 1695461137871974930ULL, 1593ULL}), + BasicDecimal256FromLE({4611686018427387904ULL, 2477500319180559562ULL, + 16954611378719749304ULL, 15930ULL}), + BasicDecimal256FromLE({9223372036854775808ULL, 6328259118096044006ULL, + 3525417123811528497ULL, 159309ULL}), + BasicDecimal256FromLE( + {0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}), + BasicDecimal256FromLE( + {0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}), + BasicDecimal256FromLE( + {0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}), + BasicDecimal256FromLE( + {0ULL, 10259008136678022120ULL, 2443313256331835254ULL, 1593091911ULL}), + BasicDecimal256FromLE( + {0ULL, 10356360998232463120ULL, 5986388489608800929ULL, 15930919111ULL}), + BasicDecimal256FromLE( + {0ULL, 11329889613776873120ULL, 4523652674959354447ULL, 159309191113ULL}), + BasicDecimal256FromLE( + {0ULL, 2618431695511421504ULL, 8343038602174441244ULL, 1593091911132ULL}), + BasicDecimal256FromLE( + {0ULL, 7737572881404663424ULL, 9643409726906205977ULL, 15930919111324ULL}), + BasicDecimal256FromLE( + {0ULL, 3588752519208427776ULL, 4200376900514301694ULL, 159309191113245ULL}), + BasicDecimal256FromLE( + {0ULL, 17440781118374726144ULL, 5110280857723913709ULL, 1593091911132452ULL}), + BasicDecimal256FromLE( + {0ULL, 8387114520361296896ULL, 14209320429820033867ULL, 15930919111324522ULL}), + BasicDecimal256FromLE( + {0ULL, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}), + BasicDecimal256FromLE( + {0ULL, 8607968719199866880ULL, 532749306367912313ULL, 1593091911132452277ULL})}; + +constexpr BasicDecimal256 kDecimal256HalfPowersOfTen[] = { + BasicDecimal256FromLE({0ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({50ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({500ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({50000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({500000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({50000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({500000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({50000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({500000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({50000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({500000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({50000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({500000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({932356024711512064ULL, 271ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}), + BasicDecimal256FromLE({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}), + BasicDecimal256FromLE( + {15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}), + BasicDecimal256FromLE( + {11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}), + BasicDecimal256FromLE( + {5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}), + BasicDecimal256FromLE( + {3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}), + BasicDecimal256FromLE( + {40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}), + BasicDecimal256FromLE( + {401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}), + BasicDecimal256FromLE( + {4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}), + BasicDecimal256FromLE( + {3225492126871584768ULL, 15731123957095624514ULL, 146936793ULL, 0ULL}), + BasicDecimal256FromLE( + {13808177195006296064ULL, 9737286981279832213ULL, 1469367938ULL, 0ULL}), + BasicDecimal256FromLE( + {8954563434096099328ULL, 5139149444250564057ULL, 14693679385ULL, 0ULL}), + BasicDecimal256FromLE( + {15758658046122786816ULL, 14498006295086537342ULL, 146936793852ULL, 0ULL}), + BasicDecimal256FromLE( + {10012627871551455232ULL, 15852854434898512116ULL, 1469367938527ULL, 0ULL}), + BasicDecimal256FromLE( + {7892558346966794240ULL, 10954591759308708237ULL, 14693679385278ULL, 0ULL}), + BasicDecimal256FromLE( + {5138607174829735936ULL, 17312197224539324294ULL, 146936793852785ULL, 0ULL}), + BasicDecimal256FromLE( + {14492583600878256128ULL, 7101275582007278398ULL, 1469367938527859ULL, 0ULL}), + BasicDecimal256FromLE( + {15798627492815699968ULL, 15672523598944129139ULL, 14693679385278593ULL, 0ULL}), + BasicDecimal256FromLE( + {10412322338480586752ULL, 9151283399764878470ULL, 146936793852785938ULL, 0ULL}), + BasicDecimal256FromLE( + {11889503016258109440ULL, 17725857702810578241ULL, 1469367938527859384ULL, 0ULL}), + BasicDecimal256FromLE( + {8214565720323784704ULL, 11237880364719817872ULL, 14693679385278593849ULL, 0ULL}), + BasicDecimal256FromLE( + {8358680908399640576ULL, 1698339204940869028ULL, 17809585336819077184ULL, 7ULL}), + BasicDecimal256FromLE({9799832789158199296ULL, 16983392049408690284ULL, + 12075156704804807296ULL, 79ULL}), + BasicDecimal256FromLE({5764607523034234880ULL, 3813223830700938301ULL, + 10071102605790763273ULL, 796ULL}), + BasicDecimal256FromLE({2305843009213693952ULL, 1238750159590279781ULL, + 8477305689359874652ULL, 7965ULL}), + BasicDecimal256FromLE({4611686018427387904ULL, 12387501595902797811ULL, + 10986080598760540056ULL, 79654ULL}), + BasicDecimal256FromLE({9223372036854775808ULL, 13194551516770668416ULL, + 17627085619057642486ULL, 796545ULL}), + BasicDecimal256FromLE( + {0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}), + BasicDecimal256FromLE( + {0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}), + BasicDecimal256FromLE( + {0ULL, 5129504068339011060ULL, 10445028665020693435ULL, 796545955ULL}), + BasicDecimal256FromLE( + {0ULL, 14401552535971007368ULL, 12216566281659176272ULL, 7965459555ULL}), + BasicDecimal256FromLE( + {0ULL, 14888316843743212368ULL, 11485198374334453031ULL, 79654595556ULL}), + BasicDecimal256FromLE( + {0ULL, 1309215847755710752ULL, 4171519301087220622ULL, 796545955566ULL}), + BasicDecimal256FromLE( + {0ULL, 13092158477557107520ULL, 4821704863453102988ULL, 7965459555662ULL}), + BasicDecimal256FromLE( + {0ULL, 1794376259604213888ULL, 11323560487111926655ULL, 79654595556622ULL}), + BasicDecimal256FromLE( + {0ULL, 17943762596042138880ULL, 2555140428861956854ULL, 796545955566226ULL}), + BasicDecimal256FromLE( + {0ULL, 13416929297035424256ULL, 7104660214910016933ULL, 7965459555662261ULL}), + BasicDecimal256FromLE( + {0ULL, 5042084454387381248ULL, 15706369927971514489ULL, 79654595556622613ULL}), + BasicDecimal256FromLE( + {0ULL, 13527356396454709248ULL, 9489746690038731964ULL, 796545955566226138ULL})}; + +#undef BasicDecimal256FromLE + +constexpr int kCeilLog2PowersOfTen[76 + 1] = { + 0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40, 44, 47, 50, + 54, 57, 60, 64, 67, 70, 74, 77, 80, 84, 87, 90, 94, 97, 100, 103, + 107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157, + 160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210, + 213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253}; + +template +struct RealTraits {}; + +template <> +struct RealTraits { + static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; } + + static constexpr float two_to_64(float x) { return x * 1.8446744e+19f; } + static constexpr float two_to_128(float x) { return x == 0 ? 0 : kFloatInf; } + static constexpr float two_to_192(float x) { return x == 0 ? 0 : kFloatInf; } + + static constexpr int kMantissaBits = 24; + // ceil(log10(2 ^ kMantissaBits)) + static constexpr int kMantissaDigits = 8; +}; + +template <> +struct RealTraits { + static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; } + + static constexpr double two_to_64(double x) { return x * 1.8446744073709552e+19; } + static constexpr double two_to_128(double x) { return x * 3.402823669209385e+38; } + static constexpr double two_to_192(double x) { return x * 6.277101735386681e+57; } + + static constexpr int kMantissaBits = 53; + // ceil(log10(2 ^ kMantissaBits)) + static constexpr int kMantissaDigits = 16; +}; + +template +struct DecimalTraits {}; + +template <> +struct DecimalTraits { + static constexpr const BasicDecimal128* powers_of_ten() { + return kDecimal128PowersOfTen; + } + + static constexpr int kMaxPrecision = BasicDecimal128::kMaxPrecision; + static constexpr const char* kTypeName = "Decimal128"; +}; + +template <> +struct DecimalTraits { + static constexpr const BasicDecimal256* powers_of_ten() { + return kDecimal256PowersOfTen; + } + + static constexpr int kMaxPrecision = BasicDecimal128::kMaxPrecision; + static constexpr const char* kTypeName = "Decimal256"; +}; + +template <> +struct DecimalTraits : public DecimalTraits {}; +template <> +struct DecimalTraits : public DecimalTraits {}; + +} // namespace arrow diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index ebba3029c3c..1401750ce76 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -19,9 +19,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -51,6 +53,10 @@ using DecimalTypes = ::testing::Types; static const int128_t kInt128Max = (static_cast(INT64_MAX) << 64) + static_cast(UINT64_MAX); +void PrintTo(const BasicDecimal128& v, std::ostream* os) { *os << Decimal128(v); } + +void PrintTo(const BasicDecimal256& v, std::ostream* os) { *os << Decimal256(v); } + template void AssertDecimalFromString(const std::string& s, const DecimalType& expected, int32_t expected_precision, int32_t expected_scale) { @@ -753,7 +759,7 @@ TEST_P(Decimal128ToStringTest, ToString) { const ToStringTestParam& param = GetParam(); const Decimal128 value(param.test_value); const std::string printed_value = value.ToString(param.scale); - ASSERT_EQ(param.expected_string, printed_value); + EXPECT_EQ(param.expected_string, printed_value); } INSTANTIATE_TEST_SUITE_P(Decimal128ToStringTest, Decimal128ToStringTest, @@ -763,14 +769,20 @@ template void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale, const std::string& expected) { ASSERT_OK_AND_ASSIGN(auto dec, Decimal::FromReal(real, precision, scale)); - ASSERT_EQ(dec.ToString(scale), expected); + EXPECT_EQ(dec.ToString(scale), expected); + const std::string expected_neg = (dec) ? "-" + expected : expected; + ASSERT_OK_AND_ASSIGN(dec, Decimal::FromReal(-real, precision, scale)); + EXPECT_EQ(dec.ToString(scale), expected_neg); } template void CheckDecimalFromRealIntegerString(Real real, int32_t precision, int32_t scale, const std::string& expected) { ASSERT_OK_AND_ASSIGN(auto dec, Decimal::FromReal(real, precision, scale)); - ASSERT_EQ(dec.ToIntegerString(), expected); + EXPECT_EQ(dec.ToIntegerString(), expected); + const std::string expected_neg = (dec) ? "-" + expected : expected; + ASSERT_OK_AND_ASSIGN(dec, Decimal::FromReal(-real, precision, scale)); + EXPECT_EQ(dec.ToIntegerString(), expected_neg); } template @@ -802,34 +814,21 @@ class TestDecimalFromReal : public ::testing::Test { const std::vector params{ // clang-format off {0.0f, 1, 0, "0"}, - {-0.0f, 1, 0, "0"}, {0.0f, 19, 4, "0.0000"}, - {-0.0f, 19, 4, "0.0000"}, {123.0f, 7, 4, "123.0000"}, - {-123.0f, 7, 4, "-123.0000"}, {456.78f, 7, 4, "456.7800"}, - {-456.78f, 7, 4, "-456.7800"}, {456.784f, 5, 2, "456.78"}, - {-456.784f, 5, 2, "-456.78"}, {456.786f, 5, 2, "456.79"}, - {-456.786f, 5, 2, "-456.79"}, {999.99f, 5, 2, "999.99"}, - {-999.99f, 5, 2, "-999.99"}, {123.0f, 19, 0, "123"}, - {-123.0f, 19, 0, "-123"}, {123.4f, 19, 0, "123"}, - {-123.4f, 19, 0, "-123"}, {123.6f, 19, 0, "124"}, - {-123.6f, 19, 0, "-124"}, // 2**62 {4.6116860184273879e+18, 19, 0, "4611686018427387904"}, - {-4.6116860184273879e+18, 19, 0, "-4611686018427387904"}, // 2**63 {9.2233720368547758e+18, 19, 0, "9223372036854775808"}, - {-9.2233720368547758e+18, 19, 0, "-9223372036854775808"}, // 2**64 {1.8446744073709552e+19, 20, 0, "18446744073709551616"}, - {-1.8446744073709552e+19, 20, 0, "-18446744073709551616"} // clang-format on }; for (const ParamType& param : params) { @@ -849,8 +848,8 @@ class TestDecimalFromReal : public ::testing::Test { ASSERT_RAISES(Invalid, Decimal::FromReal(-1000.0, 5, 2)); ASSERT_RAISES(Invalid, Decimal::FromReal(999.996, 5, 2)); ASSERT_RAISES(Invalid, Decimal::FromReal(-999.996, 5, 2)); - ASSERT_RAISES(Invalid, Decimal::FromReal(1e+38, 38, 0)); - ASSERT_RAISES(Invalid, Decimal::FromReal(-1e+38, 38, 0)); + ASSERT_RAISES(Invalid, Decimal::FromReal(1e+36, 36, 0)); + ASSERT_RAISES(Invalid, Decimal::FromReal(-1e+36, 36, 0)); } }; @@ -868,24 +867,29 @@ template class TestDecimalFromRealFloat : public ::testing::Test { protected: std::vector GetValues() { - return {// 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) - FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"}, - FromFloatTestParam{-9.223373e+18f, 19, 0, "-9223373136366403584"}, - FromFloatTestParam{9.223373e+14f, 19, 4, "922337313636640.3584"}, - FromFloatTestParam{-9.223373e+14f, 19, 4, "-922337313636640.3584"}, - // 2**64 - 2**40 (exactly representable in a float) - FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"}, - FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"}, - // 2**64 + 2**41 (exactly representable in a float) - FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"}, - FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"}, - FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"}, - FromFloatTestParam{-1.8446746e+15f, 20, 4, "-1844674627273280.7168"}, - // Almost 10**38 (minus 2**103) - FromFloatTestParam{9.999999e+37f, 38, 0, - "99999986661652122824821048795547566080"}, - FromFloatTestParam{-9.999999e+37f, 38, 0, - "-99999986661652122824821048795547566080"}}; + return { + // -- Stress the 24 bits of precision of a float + // 2**63 + 2**40 + FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"}, + // 2**64 - 2**40 + FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"}, + // 2**64 + 2**41 + FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"}, + // 2**14 - 2**-10 + FromFloatTestParam{16383.999f, 8, 3, "16383.999"}, + FromFloatTestParam{16383.999f, 19, 3, "16383.999"}, + // 1 - 2**-24 + FromFloatTestParam{0.99999994f, 10, 10, "0.9999999404"}, + FromFloatTestParam{0.99999994f, 16, 16, "0.9999999403953552"}, + FromFloatTestParam{0.99999994f, 20, 20, "0.99999994039535522461"}, + FromFloatTestParam{0.99999994f, 21, 21, "0.999999940395355224609"}, + FromFloatTestParam{0.99999994f, 38, 38, + "0.99999994039535522460937500000000000000"}, + // -- Other cases + // 10**38 - 2**103 + FromFloatTestParam{9.999999e+37f, 38, 0, + "99999986661652122824821048795547566080"}, + }; } }; TYPED_TEST_SUITE(TestDecimalFromRealFloat, DecimalTypes); @@ -916,28 +920,92 @@ template class TestDecimalFromRealDouble : public ::testing::Test { protected: std::vector GetValues() { - return {// 2**63 + 2**11 (exactly representable in a double's 53 bits of precision) - FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"}, - FromDoubleTestParam{-9.223372036854778e+18, 19, 0, "-9223372036854777856"}, - FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777856"}, - FromDoubleTestParam{-9.223372036854778e+10, 19, 8, "-92233720368.54777856"}, - // 2**64 - 2**11 (exactly representable in a double) - FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"}, - FromDoubleTestParam{-1.844674407370955e+19, 20, 0, "-18446744073709549568"}, - // 2**64 + 2**11 (exactly representable in a double) - FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"}, - FromDoubleTestParam{-1.8446744073709556e+19, 20, 0, "-18446744073709555712"}, - FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5712"}, - FromDoubleTestParam{-1.8446744073709556e+15, 20, 4, "-1844674407370955.5712"}, - // Almost 10**38 (minus 2**73) - FromDoubleTestParam{9.999999999999998e+37, 38, 0, - "99999999999999978859343891977453174784"}, - FromDoubleTestParam{-9.999999999999998e+37, 38, 0, - "-99999999999999978859343891977453174784"}, - FromDoubleTestParam{9.999999999999998e+27, 38, 10, - "9999999999999997885934389197.7453174784"}, - FromDoubleTestParam{-9.999999999999998e+27, 38, 10, - "-9999999999999997885934389197.7453174784"}}; + std::vector values = { + // -- Stress the 53 bits of precision of a double + // 2**63 + 2**11 + FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"}, + // 2**64 - 2**11 + FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"}, + // 2**64 + 2**11 + FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"}, + // 2**126 + FromDoubleTestParam{8.507059173023462e+37, 38, 0, + "85070591730234615865843651857942052864"}, + // 2**126 - 2**74 + FromDoubleTestParam{8.50705917302346e+37, 38, 0, + "85070591730234596976377720379361198080"}, + // 2**36 - 2**-16 + FromDoubleTestParam{68719476735.999985, 11, 0, "68719476736"}, + FromDoubleTestParam{68719476735.999985, 38, 27, + "68719476735.999984741210937500000000000"}, + // -- Other cases + // Almost 10**38 (minus 2**73) + FromDoubleTestParam{9.999999999999998e+37, 38, 0, + "99999999999999978859343891977453174784"}, + FromDoubleTestParam{9.999999999999998e+27, 38, 10, + "9999999999999997384096481280.0000000000"}, + // 10**N (sometimes fits in N digits) + FromDoubleTestParam{1e23, 23, 0, "99999999999999991611392"}, + FromDoubleTestParam{1e23, 24, 1, "99999999999999991611392.0"}, + FromDoubleTestParam{1e36, 37, 0, "1000000000000000042420637374017961984"}, + FromDoubleTestParam{1e36, 38, 1, "1000000000000000042420637374017961984.0"}, + FromDoubleTestParam{1e37, 37, 0, "9999999999999999538762658202121142272"}, + FromDoubleTestParam{1e37, 38, 1, "9999999999999999538762658202121142272.0"}, + FromDoubleTestParam{1e38, 38, 0, "99999999999999997748809823456034029568"}, + // Hand-picked test cases that can involve precision issues. + // More comprehensive testing is done in the PyArrow test suite. + FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777527"}, + FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5000"}, + FromDoubleTestParam{999999999999999.0, 16, 1, "999999999999999.0"}, + FromDoubleTestParam{9999999999999998.0, 17, 1, "9999999999999998.0"}, + FromDoubleTestParam{999999999999999.9, 16, 1, "999999999999999.9"}, + FromDoubleTestParam{9999999987., 38, 22, "9999999987.0000000000000000000000"}, + FromDoubleTestParam{9999999987., 38, 28, + "9999999987.0000000000000000000000000000"}, + }; + std::vector type_dependent_values; + if (std::is_same_v) { + // clang-format off + type_dependent_values = { + // 1 - 2**-52 + // XXX the result should be 0.99999999999999977795539507496869191527 + // but our algorithm loses the right digit. + FromDoubleTestParam{0.9999999999999998, 38, 38, + "0.99999999999999977795539507496869191520"}, + FromDoubleTestParam{0.9999999999999998, 20, 20, "0.99999999999999977796"}, + FromDoubleTestParam{0.9999999999999998, 16, 16, "0.9999999999999998"}, + }; + // clang-format on + } else { + // clang-format off + type_dependent_values = { + // 1 - 2**-52 + FromDoubleTestParam{ + 0.9999999999999998, 76, 76, + ("0.9999999999999997779553950749686919152736663818359375" + "000000000000000000000000")}, + FromDoubleTestParam{0.9999999999999998, 76, 52, + "0.9999999999999997779553950749686919152736663818359375"}, + FromDoubleTestParam{0.9999999999999998, 76, 51, + "0.999999999999999777955395074968691915273666381835938"}, + FromDoubleTestParam{0.9999999999999998, 38, 38, + "0.99999999999999977795539507496869191527"}, + FromDoubleTestParam{0.9999999999999998, 20, 20, "0.99999999999999977796"}, + FromDoubleTestParam{0.9999999999999998, 16, 16, "0.9999999999999998"}, + // Almost 10**76 + FromDoubleTestParam{9.999999999999999e+75, 76, 0, + "999999999999999886366330070006442034959750906670402" + "8242075715752105414230016"}, + FromDoubleTestParam{ + 9.999999999999999e+65, 76, 10, + ("999999999999999945322333868247445125709646570021247924665841614848" + ".0000000000")}, + }; + // clang-format on + } + values.insert(values.end(), type_dependent_values.begin(), + type_dependent_values.end()); + return values; } }; TYPED_TEST_SUITE(TestDecimalFromRealDouble, DecimalTypes); @@ -950,41 +1018,21 @@ TYPED_TEST(TestDecimalFromRealDouble, SuccessConversion) { } TYPED_TEST(TestDecimalFromRealDouble, LargeValues) { - // Test the entire double range - for (int32_t scale = -308; scale <= 308; ++scale) { + constexpr auto kMaxScale = TypeParam::kMaxScale; + for (int32_t scale = -kMaxScale; scale <= kMaxScale; ++scale) { double real = std::pow(10.0, static_cast(scale)); + ARROW_SCOPED_TRACE("scale = ", scale, ", real = ", real); CheckDecimalFromRealIntegerString(real, 1, -scale, "1"); } - for (int32_t scale = -307; scale <= 306; ++scale) { + for (int32_t scale = -kMaxScale + 1; scale <= kMaxScale - 1; ++scale) { double real = 123. * std::pow(10.0, static_cast(scale)); + ARROW_SCOPED_TRACE("scale = ", scale, ", real = ", real); CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12"); CheckDecimalFromRealIntegerString(real, 3, -scale, "123"); CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230"); } } -// Additional values that only apply to Decimal256 -TEST(TestDecimal256FromRealDouble, ExtremeValues) { - const std::vector values = { - // Almost 10**76 - FromDoubleTestParam{9.999999999999999e+75, 76, 0, - "999999999999999886366330070006442034959750906670402" - "8242075715752105414230016"}, - FromDoubleTestParam{-9.999999999999999e+75, 76, 0, - "-999999999999999886366330070006442034959750906670402" - "8242075715752105414230016"}, - FromDoubleTestParam{9.999999999999999e+65, 76, 10, - "999999999999999886366330070006442034959750906670402" - "824207571575210.5414230016"}, - FromDoubleTestParam{-9.999999999999999e+65, 76, 10, - "-999999999999999886366330070006442034959750906670402" - "824207571575210.5414230016"}}; - for (const auto& param : values) { - CheckDecimalFromReal(param.real, param.precision, param.scale, - param.expected); - } -} - template struct ToRealTestParam { std::string decimal_value; @@ -1124,10 +1172,11 @@ TYPED_TEST(TestDecimalToRealDouble, LargeValues) { // Note that exact comparisons would succeed on some platforms (Linux, macOS). // Nevertheless, power-of-ten factors are not all exactly representable // in binary floating point. - for (int32_t scale = -308; scale <= 308; scale++) { + constexpr auto kMaxScale = TypeParam::kMaxScale; + for (int32_t scale = -kMaxScale; scale <= kMaxScale; scale++) { CheckDecimalToRealApprox("1", scale, this->Pow10(-scale)); } - for (int32_t scale = -308; scale <= 306; scale++) { + for (int32_t scale = -kMaxScale; scale <= kMaxScale; scale++) { const double factor = 123.; CheckDecimalToRealApprox("123", scale, factor * this->Pow10(-scale)); } @@ -1797,7 +1846,7 @@ TEST(Decimal256Test, Multiply) { } } -TEST(Decimal256Test, Shift) { +TEST(Decimal256Test, LeftShift) { { // Values compared against python's implementation of shift. Decimal256 v(967); @@ -1845,6 +1894,91 @@ TEST(Decimal256Test, Shift) { } } +TEST(Decimal256Test, RightShift) { + // Values compared against python's implementation of shift. + { + Decimal256 v(9876); + ASSERT_EQ(v >> 1, Decimal256(4938)); + ASSERT_EQ(v >> 3, Decimal256(1234)); + ASSERT_EQ(v >> 8, Decimal256(38)); + ASSERT_EQ(v >> 63, Decimal256(0)); + ASSERT_EQ(v >> 92, Decimal256(0)); + ASSERT_EQ(v >> 255, Decimal256(0)); + ASSERT_EQ(v >> 256, Decimal256(0)); + ASSERT_EQ(v >> 257, Decimal256(0)); + } + { + Decimal256 v(-9876); + ASSERT_EQ(v >> 1, Decimal256(-4938)); + ASSERT_EQ(v >> 3, Decimal256(-1235)); + ASSERT_EQ(v >> 8, Decimal256(-39)); + ASSERT_EQ(v >> 63, Decimal256(-1)); + ASSERT_EQ(v >> 92, Decimal256(-1)); + ASSERT_EQ(v >> 255, Decimal256(-1)); + ASSERT_EQ(v >> 256, Decimal256(-1)); + ASSERT_EQ(v >> 257, Decimal256(-1)); + } + { + Decimal256 v("98765643210987654321098765632109876543"); + ASSERT_EQ(v >> 1, Decimal256("49382821605493827160549382816054938271")); + ASSERT_EQ(v >> 63, Decimal256("10708192493628102678")); + ASSERT_EQ(v >> 64, Decimal256("5354096246814051339")); + ASSERT_EQ(v >> 65, Decimal256("2677048123407025669")); + ASSERT_EQ(v >> 120, Decimal256(74)); + ASSERT_EQ(v >> 127, Decimal256(0)); + ASSERT_EQ(v >> 128, Decimal256(0)); + ASSERT_EQ(v >> 255, Decimal256(0)); + } + { + Decimal256 v("-98765643210987654321098765632109876543"); + ASSERT_EQ(v >> 1, Decimal256("-49382821605493827160549382816054938272")); + ASSERT_EQ(v >> 63, Decimal256("-10708192493628102679")); + ASSERT_EQ(v >> 64, Decimal256("-5354096246814051340")); + ASSERT_EQ(v >> 65, Decimal256("-2677048123407025670")); + ASSERT_EQ(v >> 120, Decimal256(-75)); + ASSERT_EQ(v >> 127, Decimal256(-1)); + ASSERT_EQ(v >> 128, Decimal256(-1)); + ASSERT_EQ(v >> 255, Decimal256(-1)); + } + { + Decimal256 v( + "9876543210987654321098765432109876543210987654321098765432109876543210987654"); + ASSERT_EQ(v >> 1, Decimal256("4938271605493827160549382716054938271605493827160549382" + "716054938271605493827")); + ASSERT_EQ(v >> 62, + Decimal256("2141633921199954920344235701999924550815608299808123329980")); + ASSERT_EQ(v >> 64, + Decimal256("535408480299988730086058925499981137703902074952030832495")); + ASSERT_EQ(v >> 67, + Decimal256("66926060037498591260757365687497642212987759369003854061")); + ASSERT_EQ(v >> 128, Decimal256("29024551875420509196283165267081523552")); + ASSERT_EQ(v >> 129, Decimal256("14512275937710254598141582633540761776")); + ASSERT_EQ(v >> 240, Decimal256(5589)); + ASSERT_EQ(v >> 252, Decimal256(1)); + ASSERT_EQ(v >> 253, Decimal256(0)); + } + { + Decimal256 v( + "-9876543210987654321098765432109876543210987654321098765432109876543210987654"); + ASSERT_EQ(v >> 1, Decimal256("-493827160549382716054938271605493827160549382716054938" + "2716054938271605493827")); + ASSERT_EQ(v >> 62, + Decimal256("-2141633921199954920344235701999924550815608299808123329981")); + ASSERT_EQ(v >> 64, + Decimal256("-535408480299988730086058925499981137703902074952030832496")); + ASSERT_EQ(v >> 67, + Decimal256("-66926060037498591260757365687497642212987759369003854062")); + ASSERT_EQ(v >> 128, Decimal256("-29024551875420509196283165267081523553")); + ASSERT_EQ(v >> 129, Decimal256("-14512275937710254598141582633540761777")); + ASSERT_EQ(v >> 240, Decimal256(-5590)); + ASSERT_EQ(v >> 252, Decimal256(-2)); + ASSERT_EQ(v >> 253, Decimal256(-1)); + ASSERT_EQ(v >> 255, Decimal256(-1)); + ASSERT_EQ(v >> 256, Decimal256(-1)); + ASSERT_EQ(v >> 257, Decimal256(-1)); + } +} + TEST(Decimal256Test, Add) { EXPECT_EQ(Decimal256(103), Decimal256(100) + Decimal256(3)); EXPECT_EQ(Decimal256(203), Decimal256(200) + Decimal256(3)); diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index f934edd3c3b..3a3abc1042f 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -15,10 +15,13 @@ # specific language governing permissions and limitations # under the License. +from collections import namedtuple import datetime +import decimal from functools import lru_cache, partial import inspect import itertools +import math import os import pickle import pytest @@ -1822,6 +1825,128 @@ def test_fsl_to_fsl_cast(value_type): fsl.cast(cast_type) +DecimalTypeTraits = namedtuple('DecimalTypeTraits', + ('name', 'factory', 'max_precision')) + +FloatToDecimalCase = namedtuple('FloatToDecimalCase', + ('precision', 'scale', 'float_val')) + +decimal_type_traits = [DecimalTypeTraits('decimal128', pa.decimal128, 38), + DecimalTypeTraits('decimal256', pa.decimal256, 76)] + + +def largest_scaled_float_not_above(val, scale): + """ + Find the largest float f such as `f * 10**scale <= val` + """ + assert val >= 0 + assert scale >= 0 + float_val = float(val) / 10**scale + if float_val * 10**scale > val: + # Take the float just below... it *should* satisfy + float_val = math.nextafter(float_val, 0.0) + if float_val * 10**scale > val: + float_val = math.nextafter(float_val, 0.0) + assert float_val * 10**scale <= val + return float_val + + +def scaled_float(int_val, scale): + """ + Return a float representation (possibly approximate) of `int_val**-scale` + """ + assert isinstance(int_val, int) + unscaled = decimal.Decimal(int_val) + scaled = unscaled.scaleb(-scale) + float_val = float(scaled) + return float_val + + +def integral_float_to_decimal_cast_cases(float_ty, max_precision): + """ + Return FloatToDecimalCase instances with integral values. + """ + mantissa_digits = 16 + for precision in range(1, max_precision, 3): + for scale in range(0, precision, 2): + yield FloatToDecimalCase(precision, scale, 0.0) + yield FloatToDecimalCase(precision, scale, 1.0) + epsilon = 10**max(precision - mantissa_digits, scale) + abs_maxval = largest_scaled_float_not_above( + 10**precision - epsilon, scale) + yield FloatToDecimalCase(precision, scale, abs_maxval) + + +def real_float_to_decimal_cast_cases(float_ty, max_precision): + """ + Return FloatToDecimalCase instances with real values. + """ + mantissa_digits = 16 + for precision in range(1, max_precision, 3): + for scale in range(0, precision, 2): + epsilon = 2 * 10**max(precision - mantissa_digits, 0) + abs_minval = largest_scaled_float_not_above(epsilon, scale) + abs_maxval = largest_scaled_float_not_above( + 10**precision - epsilon, scale) + yield FloatToDecimalCase(precision, scale, abs_minval) + yield FloatToDecimalCase(precision, scale, abs_maxval) + + +def random_float_to_decimal_cast_cases(float_ty, max_precision): + """ + Return random-generated FloatToDecimalCase instances. + """ + r = random.Random(42) + for precision in range(1, max_precision, 6): + for scale in range(0, precision, 4): + for i in range(20): + unscaled = r.randrange(0, 10**precision) + float_val = scaled_float(unscaled, scale) + assert float_val * 10**scale < 10**precision + yield FloatToDecimalCase(precision, scale, float_val) + + +def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx, + max_precision): + # Use the Python decimal module to build the expected result + # using the right precision + decimal_ctx.prec = decimal_ty.precision + decimal_ctx.rounding = decimal.ROUND_HALF_EVEN + expected = decimal_ctx.create_decimal_from_float(float_val) + # Round `expected` to `scale` digits after the decimal point + expected = expected.quantize(decimal.Decimal(1).scaleb(-decimal_ty.scale)) + s = pa.scalar(float_val, type=float_ty) + actual = pc.cast(s, decimal_ty).as_py() + if actual != expected: + # Allow the last digit to vary. The tolerance is higher for + # very high precisions as rounding errors can accumulate in + # the iterative algorithm (GH-35576). + diff_digits = abs(actual - expected) * 10**decimal_ty.scale + limit = 2 if decimal_ty.precision < max_precision - 1 else 4 + assert diff_digits <= limit, ( + f"float_val = {float_val!r}, precision={decimal_ty.precision}, " + f"expected = {expected!r}, actual = {actual!r}, " + f"diff_digits = {diff_digits!r}") + + +# XXX Cannot test float32 as case generators above assume float64 +@pytest.mark.parametrize('float_ty', [pa.float64()], ids=str) +@pytest.mark.parametrize('decimal_ty', decimal_type_traits, + ids=lambda v: v.name) +@pytest.mark.parametrize('case_generator', + [integral_float_to_decimal_cast_cases, + real_float_to_decimal_cast_cases, + random_float_to_decimal_cast_cases], + ids=['integrals', 'reals', 'random']) +def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator): + with decimal.localcontext() as ctx: + for case in case_generator(float_ty, decimal_ty.max_precision): + check_cast_float_to_decimal( + float_ty, case.float_val, + decimal_ty.factory(case.precision, case.scale), + ctx, decimal_ty.max_precision) + + def test_strptime(): arr = pa.array(["5/1/2020", None, "12/13/1900"]) From daef12bbd0f20a20b6b5ffc04e02c9309e25f9d7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 8 Jun 2023 18:25:21 +0200 Subject: [PATCH 2/4] Fix out of bounds read --- cpp/src/arrow/util/basic_decimal.cc | 2 +- cpp/src/arrow/util/decimal.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index d7affddb473..6b8bfdffcb5 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -864,7 +864,7 @@ BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) { } uint32_t in_word_shift = bits % 64; auto array_le = bit_util::little_endian::Make(&array_); - for (int i = kWordWidth; i >= cross_word_shift; i--) { + for (int i = kWordWidth - 1; i >= cross_word_shift; i--) { // Account for shifts larger then 64 bits array_le[i] = array_le[i - cross_word_shift]; array_le[i] <<= in_word_shift; diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 4368911b65e..d4cbdbda889 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -72,7 +72,7 @@ struct DecimalRealConversion : public BaseDecimalRealConversion { static constexpr int kMaxPrecision = DecimalType::kMaxPrecision; static constexpr int kMaxScale = DecimalType::kMaxScale; - static const auto DecimalPowerOfTen(int exp) { + static const auto& DecimalPowerOfTen(int exp) { DCHECK(exp >= 0 && exp <= kMaxPrecision); return DecimalTypeTraits::powers_of_ten()[exp]; } From 747dfa62a45c30b871ec34bcfc00cdcd7de98cfc Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 8 Jun 2023 18:54:14 +0200 Subject: [PATCH 3/4] Compatibility with older Pythons --- python/pyarrow/tests/test_compute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 3a3abc1042f..c84132d53b5 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1844,9 +1844,9 @@ def largest_scaled_float_not_above(val, scale): float_val = float(val) / 10**scale if float_val * 10**scale > val: # Take the float just below... it *should* satisfy - float_val = math.nextafter(float_val, 0.0) + float_val = np.nextafter(float_val, 0.0) if float_val * 10**scale > val: - float_val = math.nextafter(float_val, 0.0) + float_val = np.nextafter(float_val, 0.0) assert float_val * 10**scale <= val return float_val From 87be93afceeeabba03811d672e63465ce0e6875f Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 14 Jun 2023 14:46:16 +0200 Subject: [PATCH 4/4] Address review comments and add a test --- cpp/src/arrow/util/basic_decimal.cc | 10 ++--- cpp/src/arrow/util/basic_decimal.h | 2 +- cpp/src/arrow/util/decimal.cc | 19 ++++---- cpp/src/arrow/util/decimal_internal.h | 7 ++- python/pyarrow/tests/test_compute.py | 62 ++++++++++++++++++++++++++- 5 files changed, 83 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index 6b8bfdffcb5..f2fd39d6f37 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -858,13 +858,13 @@ BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) { return *this; } const int cross_word_shift = bits / 64; - if (cross_word_shift >= kWordWidth) { + if (cross_word_shift >= kNumWords) { array_ = {0, 0, 0, 0}; return *this; } uint32_t in_word_shift = bits % 64; auto array_le = bit_util::little_endian::Make(&array_); - for (int i = kWordWidth - 1; i >= cross_word_shift; i--) { + for (int i = kNumWords - 1; i >= cross_word_shift; i--) { // Account for shifts larger then 64 bits array_le[i] = array_le[i - cross_word_shift]; array_le[i] <<= in_word_shift; @@ -885,7 +885,7 @@ BasicDecimal256& BasicDecimal256::operator>>=(uint32_t bits) { const uint64_t extended = static_cast(static_cast(array_[kHighWordIndex]) >> 63); const int cross_word_shift = bits / 64; - if (cross_word_shift >= kWordWidth) { + if (cross_word_shift >= kNumWords) { array_.fill(extended); return *this; } @@ -895,10 +895,10 @@ BasicDecimal256& BasicDecimal256::operator>>=(uint32_t bits) { WordArray shifted_le; shifted_le.fill(extended); // Iterate from LSW to MSW - for (int i = cross_word_shift; i < kWordWidth; ++i) { + for (int i = cross_word_shift; i < kNumWords; ++i) { shifted_le[i - cross_word_shift] = array_le[i] >> in_word_shift; if (in_word_shift != 0) { - const uint64_t carry_bits = (i + 1 < kWordWidth ? array_le[i + 1] : extended) + const uint64_t carry_bits = (i + 1 < kNumWords ? array_le[i + 1] : extended) << (64 - in_word_shift); shifted_le[i - cross_word_shift] |= carry_bits; } diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index cc13d640c73..b263bb234a7 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -54,7 +54,7 @@ class ARROW_EXPORT GenericBasicDecimal { public: static constexpr int kBitWidth = BIT_WIDTH; static constexpr int kByteWidth = kBitWidth / 8; - static constexpr int kWordWidth = NWORDS; + static constexpr int kNumWords = NWORDS; // A constructor tag to introduce a little-endian encoded array static constexpr LittleEndianArrayTag LittleEndianArray{}; diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index d4cbdbda889..1f8447059f6 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -48,18 +48,20 @@ using internal::uint128_t; namespace { struct BaseDecimalRealConversion { - // Return 10**exp, with a fast lookup, assuming `exp` is withing bounds + // Return 10**exp, with a fast lookup, assuming `exp` is within bounds template static Real PowerOfTen(int32_t exp) { - DCHECK(exp >= -76 && exp <= 76); - return RealTraits::powers_of_ten()[exp + 76]; + constexpr int N = kPrecomputedPowersOfTen; + DCHECK(exp >= -N && exp <= N); + return RealTraits::powers_of_ten()[exp + N]; } // Return 10**exp, with a fast lookup if possible template static Real LargePowerOfTen(int32_t exp) { - if (ARROW_PREDICT_TRUE(exp >= -76 && exp <= 76)) { - return RealTraits::powers_of_ten()[exp + 76]; + constexpr int N = kPrecomputedPowersOfTen; + if (ARROW_PREDICT_TRUE(exp >= -N && exp <= N)) { + return RealTraits::powers_of_ten()[exp + N]; } else { return std::pow(static_cast(10), static_cast(exp)); } @@ -165,6 +167,7 @@ struct DecimalRealConversion : public BaseDecimalRealConversion { total_exp += exp; // The supplementary right shift required so that // `x * 10^total_exp / 2^total_shift` fits in the decimal. + DCHECK_LT(static_cast(total_exp), sizeof(kCeilLog2PowersOfTen)); const int bits = std::min(right_shift_by, kCeilLog2PowersOfTen[total_exp] - total_shift); total_shift += bits; @@ -891,7 +894,7 @@ struct Decimal256RealConversion return x; } const int cross_word_shift = bits / 64; - if (cross_word_shift >= Decimal256::kWordWidth) { + if (cross_word_shift >= Decimal256::kNumWords) { return Decimal256(); } const uint32_t in_word_shift = bits % 64; @@ -908,9 +911,9 @@ struct Decimal256RealConversion const uint64_t carry_bits = array_le[cross_word_shift] << (64 - in_word_shift); shifted_out = (shifted_out > 0) | (shifted_out >> in_word_shift) | carry_bits; } - for (int i = cross_word_shift; i < Decimal256::kWordWidth; ++i) { + for (int i = cross_word_shift; i < Decimal256::kNumWords; ++i) { shifted_le[i - cross_word_shift] = array_le[i] >> in_word_shift; - if (in_word_shift != 0 && i + 1 < Decimal256::kWordWidth) { + if (in_word_shift != 0 && i + 1 < Decimal256::kNumWords) { const uint64_t carry_bits = array_le[i + 1] << (64 - in_word_shift); shifted_le[i - cross_word_shift] |= carry_bits; } diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h index 31309dfc140..041aac4ef86 100644 --- a/cpp/src/arrow/util/decimal_internal.h +++ b/cpp/src/arrow/util/decimal_internal.h @@ -65,7 +65,9 @@ constexpr float kFloatInf = std::numeric_limits::infinity(); // >>> int(1e38) // 99999999999999997748809823456034029568 -constexpr float kFloatPowersOfTen[2 * 76 + 1] = { +constexpr int kPrecomputedPowersOfTen = 76; + +constexpr float kFloatPowersOfTen[2 * kPrecomputedPowersOfTen + 1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -89,7 +91,7 @@ constexpr float kFloatPowersOfTen[2 * 76 + 1] = { kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf}; -constexpr double kDoublePowersOfTen[2 * 76 + 1] = { +constexpr double kDoublePowersOfTen[2 * kPrecomputedPowersOfTen + 1] = { 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, @@ -427,6 +429,7 @@ constexpr BasicDecimal256 kDecimal256HalfPowersOfTen[] = { #undef BasicDecimal256FromLE +// ceil(log2(10 ^ k)) for k in [0...76] constexpr int kCeilLog2PowersOfTen[76 + 1] = { 0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40, 44, 47, 50, 54, 57, 60, 64, 67, 70, 74, 77, 80, 84, 87, 90, 94, 97, 100, 103, diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index c84132d53b5..3e457259c78 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1929,7 +1929,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx, f"diff_digits = {diff_digits!r}") -# XXX Cannot test float32 as case generators above assume float64 +# Cannot test float32 as case generators above assume float64 @pytest.mark.parametrize('float_ty', [pa.float64()], ids=str) @pytest.mark.parametrize('decimal_ty', decimal_type_traits, ids=lambda v: v.name) @@ -1947,6 +1947,66 @@ def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator): ctx, decimal_ty.max_precision) +@pytest.mark.parametrize('float_ty', [pa.float32(), pa.float64()], ids=str) +@pytest.mark.parametrize('decimal_traits', decimal_type_traits, + ids=lambda v: v.name) +def test_cast_float_to_decimal_random(float_ty, decimal_traits): + """ + Test float-to-decimal conversion against exactly generated values. + """ + r = random.Random(43) + np_float_ty = { + pa.float32(): np.float32, + pa.float64(): np.float64, + }[float_ty] + mantissa_bits = { + pa.float32(): 24, + pa.float64(): 53, + }[float_ty] + float_exp_min, float_exp_max = { + pa.float32(): (-126, 127), + pa.float64(): (-1022, 1023), + }[float_ty] + mantissa_digits = math.floor(math.log10(2**mantissa_bits)) + max_precision = decimal_traits.max_precision + + with decimal.localcontext() as ctx: + precision = mantissa_digits + ctx.prec = precision + # The scale must be chosen so as + # 1) it's within bounds for the decimal type + # 2) the floating point exponent is within bounds + min_scale = max(-max_precision, + precision + math.ceil(math.log10(2**float_exp_min))) + max_scale = min(max_precision, + math.floor(math.log10(2**float_exp_max))) + for scale in range(min_scale, max_scale): + decimal_ty = decimal_traits.factory(precision, scale) + # We want to random-generate a float from its mantissa bits + # and exponent, and compute the expected value in the + # decimal domain. The float exponent has to ensure the + # expected value doesn't overflow and doesn't lose precision. + float_exp = (-mantissa_bits + + math.floor(math.log2(10**(precision - scale)))) + assert float_exp_min <= float_exp <= float_exp_max + for i in range(5): + mantissa = r.randrange(0, 2**mantissa_bits) + float_val = np.ldexp(np_float_ty(mantissa), float_exp) + assert isinstance(float_val, np_float_ty) + # Make sure we compute the exact expected value and + # round by half-to-even when converting to the expected precision. + if float_exp >= 0: + expected = decimal.Decimal(mantissa) * 2**float_exp + else: + expected = decimal.Decimal(mantissa) / 2**-float_exp + expected_as_int = round(expected.scaleb(scale)) + actual = pc.cast( + pa.scalar(float_val, type=float_ty), decimal_ty).as_py() + actual_as_int = round(actual.scaleb(scale)) + # We allow for a minor rounding error between expected and actual + assert abs(actual_as_int - expected_as_int) <= 1 + + def test_strptime(): arr = pa.array(["5/1/2020", None, "12/13/1900"])