diff --git a/cpp/src/arrow/vendored/fast_float/README.md b/cpp/src/arrow/vendored/fast_float/README.md index 4e0728c69ec..d0b249e4973 100644 --- a/cpp/src/arrow/vendored/fast_float/README.md +++ b/cpp/src/arrow/vendored/fast_float/README.md @@ -1,9 +1,7 @@ The files in this directory are vendored from fast_float -git changeset `dc46ad4c606dc35cb63c947496a18ef8ab1e0f44`. +git changeset `70c9b7f884c7f80a9a0e06fa9754c0a2e6a9492e`. See https://github.com/lemire/fast_float Changes: -- fixed include paths -- disabled unused `print()` function - enclosed in `arrow_vendored` namespace. diff --git a/cpp/src/arrow/vendored/fast_float/ascii_number.h b/cpp/src/arrow/vendored/fast_float/ascii_number.h index d1f8af4087c..d41e6f46d02 100644 --- a/cpp/src/arrow/vendored/fast_float/ascii_number.h +++ b/cpp/src/arrow/vendored/fast_float/ascii_number.h @@ -11,40 +11,31 @@ namespace arrow_vendored { namespace fast_float { -fastfloat_really_inline bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } +// Next function can be micro-optimized, but compilers are entirely +// able to optimize it well. +fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); + ::memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } -fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { - uint64_t val; - memcpy(&val, chars, 8); +fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); } -fastfloat_really_inline uint32_t parse_four_digits_unrolled(const char *chars) noexcept { - uint32_t val; - memcpy(&val, chars, sizeof(uint32_t)); - val = (val & 0x0F0F0F0F) * 2561 >> 8; - return (val & 0x00FF00FF) * 6553601 >> 16; -} - -fastfloat_really_inline bool is_made_of_four_digits_fast(const char *chars) noexcept { - uint32_t val; - memcpy(&val, chars, 4); - return (((val & 0xF0F0F0F0) | - (((val + 0x06060606) & 0xF0F0F0F0) >> 4)) == - 0x33333333); +fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { + uint64_t val; + ::memcpy(&val, chars, 8); + return is_made_of_eight_digits_fast(val); } struct parsed_number_string { @@ -57,7 +48,7 @@ struct parsed_number_string { }; -// Assuming that you use no more than 17 digits, this will +// Assuming that you use no more than 19 digits, this will // parse an ASCII string. fastfloat_really_inline parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept { @@ -81,13 +72,15 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - (*p - '0'); // might overflow, we will handle the overflow later + uint64_t(*p - '0'); // might overflow, we will handle the overflow later ++p; } int64_t exponent = 0; if ((p != pend) && (*p == '.')) { ++p; const char *first_after_period = p; +#if FASTFLOAT_IS_BIG_ENDIAN == 0 + // Fast approach only tested under little endian systems if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok p += 8; @@ -96,6 +89,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ p += 8; } } +#endif while ((p != pend) && is_integer(*p)) { uint8_t digit = uint8_t(*p - '0'); ++p; @@ -110,9 +104,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ int32_t digit_count = int32_t(p - start_digits - 1); // used later to guard against overflows - - if ((p != pend) && (('e' == *p) || ('E' == *p))) { - if((fmt & chars_format::fixed) && !(fmt & chars_format::scientific)) { return answer; } + + if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { + const char * location_of_e = p; int64_t exp_number = 0; // exponential part ++p; bool neg_exp = false; @@ -123,18 +117,25 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ ++p; } if ((p == pend) || !is_integer(*p)) { - return answer; - } - while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); - if (exp_number < 0x10000) { - exp_number = 10 * exp_number + digit; + if(!(fmt & chars_format::fixed)) { + // We are in error. + return answer; } - ++p; + // Otherwise, we will be ignoring the 'e'. + p = location_of_e; + } else { + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); } - exponent += (neg_exp ? -exp_number : exp_number); } else { - if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } + // If it scientific and not fixed, we have to bail out. + if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } } answer.lastmatch = p; answer.valid = true; @@ -163,110 +164,35 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ return answer; } -// This should always succeed since it follows a call to parse_number_string. -// It assumes that there are more than 19 mantissa digits to parse. -parsed_number_string parse_truncated_decimal(const char *&p, const char *pend) noexcept { - parsed_number_string answer; - answer.valid = true; - answer.negative = (*p == '-'); - if ((*p == '-') || (*p == '+')) { - ++p; - } - size_t number_of_digits{0}; - - - uint64_t i = 0; - - while ((p != pend) && is_integer(*p)) { - // a multiplication by 10 is cheaper than an arbitrary integer - // multiplication - if(number_of_digits < 19) { - - uint8_t digit = uint8_t(*p - '0'); - i = 10 * i + digit; - number_of_digits ++; - } - ++p; - } - int64_t exponent = 0; - if ((p != pend) && (*p == '.')) { - ++p; - const char *first_after_period = p; - - while ((p != pend) && is_integer(*p)) { - if(number_of_digits < 19) { - uint8_t digit = uint8_t(*p - '0'); - i = i * 10 + digit; - number_of_digits ++; - } else if (exponent == 0) { - exponent = first_after_period - p; - } - ++p; - } - } - - if ((p != pend) && (('e' == *p) || ('E' == *p))) { - int64_t exp_number = 0; // exponential part - ++p; - bool neg_exp = false; - if ((p != pend) && ('-' == *p)) { - neg_exp = true; - ++p; - } else if ((p != pend) && ('+' == *p)) { - ++p; - } - if ((p == pend) || !is_integer(*p)) { - return answer; - } - while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); - if (exp_number < 0x10000) { - exp_number = 10 * exp_number + digit; - } - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); - } - answer.lastmatch = p; - answer.valid = true; - answer.too_many_digits = true; // assumed - answer.exponent = exponent; - answer.mantissa = i; - return answer; -} - -// This should always succeed since it follows a call to parse_number_string. -decimal parse_decimal(const char *&p, const char *pend) noexcept { +// This should always succeed since it follows a call to parse_number_string +// This function could be optimized. In particular, we could stop after 19 digits +// and try to bail out. Furthermore, we should be able to recover the computed +// exponent from the pass in parse_number_string. +fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend) noexcept { decimal answer; answer.num_digits = 0; answer.decimal_point = 0; - answer.negative = false; answer.truncated = false; - // skip leading whitespace - while (fast_float::is_space(*p)) { - p++; - } + // any whitespace has been skipped. answer.negative = (*p == '-'); if ((*p == '-') || (*p == '+')) { ++p; } - + // skip leading zeroes while ((p != pend) && (*p == '0')) { ++p; } while ((p != pend) && is_integer(*p)) { - if (answer.num_digits + 1 < max_digits) { - answer.digits[answer.num_digits++] = uint8_t(*p - '0'); - } else { - answer.truncated = true; + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); } + answer.num_digits++; ++p; } - const char *first_after_period{}; if ((p != pend) && (*p == '.')) { ++p; - first_after_period = p; + const char *first_after_period = p; // if we have not yet encountered a zero, we have to skip it as well if(answer.num_digits == 0) { // skip zeros @@ -274,17 +200,29 @@ decimal parse_decimal(const char *&p, const char *pend) noexcept { ++p; } } +#if FASTFLOAT_IS_BIG_ENDIAN == 0 + // We expect that this loop will often take the bulk of the running time + // because when a value has lots of digits, these digits often + while ((p + 8 <= pend) && (answer.num_digits + 8 < max_digits)) { + uint64_t val; + ::memcpy(&val, p, sizeof(uint64_t)); + if(! is_made_of_eight_digits_fast(val)) { break; } + // We have eight digits, process them in one go! + val -= 0x3030303030303030; + ::memcpy(answer.digits + answer.num_digits, &val, sizeof(uint64_t)); + answer.num_digits += 8; + p += 8; + } +#endif while ((p != pend) && is_integer(*p)) { - if (answer.num_digits + 1 < max_digits) { - answer.digits[answer.num_digits++] = uint8_t(*p - '0'); - } else { - answer.truncated = true; + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); } + answer.num_digits++; ++p; } answer.decimal_point = int32_t(first_after_period - p); } - if ((p != pend) && (('e' == *p) || ('E' == *p))) { ++p; bool neg_exp = false; @@ -299,15 +237,23 @@ decimal parse_decimal(const char *&p, const char *pend) noexcept { uint8_t digit = uint8_t(*p - '0'); if (exp_number < 0x10000) { exp_number = 10 * exp_number + digit; - } + } ++p; } answer.decimal_point += (neg_exp ? -exp_number : exp_number); } - answer.decimal_point += answer.num_digits; + answer.decimal_point += int32_t(answer.num_digits); + if(answer.num_digits > max_digits) { + answer.truncated = true; + answer.num_digits = max_digits; + } + // In very rare cases, we may have fewer than 19 digits, we want to be able to reliably + // assume that all digits up to max_digit_without_overflow have been initialized. + for(uint32_t i = answer.num_digits; i < max_digit_without_overflow; i++) { answer.digits[i] = 0; } + return answer; } } // namespace fast_float -} // namespace arrow_vendored +} // namespace arrow_vendored #endif diff --git a/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h b/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h index a64a7aaca17..d2b5453ee66 100644 --- a/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h +++ b/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h @@ -15,24 +15,21 @@ namespace arrow_vendored { namespace fast_float { - - - // This will compute or rather approximate w * 5**q and return a pair of 64-bit words approximating // the result, with the "high" part corresponding to the most significant bits and the // low part corresponding to the least significant bits. -// +// template fastfloat_really_inline value128 compute_product_approximation(int64_t q, uint64_t w) { const int index = 2 * int(q - smallest_power_of_five); // For small values of q, e.g., q in [0,27], the answer is always exact because // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]); - // gives the exact answer. + // gives the exact answer. value128 firstproduct = full_multiplication(w, power_of_five_128[index]); static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]"); - constexpr uint64_t precision_mask = (bit_precision < 64) ? - (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) + constexpr uint64_t precision_mask = (bit_precision < 64) ? + (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) : uint64_t(0xFFFFFFFFFFFFFFFF); if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower) // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed. @@ -47,29 +44,35 @@ value128 compute_product_approximation(int64_t q, uint64_t w) { namespace { /** - * For q in (-400,350), we have that + * For q in (0,350), we have that * f = (((152170 + 65536) * q ) >> 16); * is equal to * floor(p) + q * where * p = log(5**q)/log(2) = q * log(5)/log(2) * + * For negative values of q in (-400,0), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * -ceil(p) + q + * where + * p = log(5**-q)/log(2) = -q * log(5)/log(2) */ - fastfloat_really_inline unsigned int power(int q) noexcept { + fastfloat_really_inline int power(int q) noexcept { return (((152170 + 65536) * q) >> 16) + 63; } } // namespace // w * 10 ** q // The returned value should be a valid ieee64 number that simply need to be packed. -// However, in some very rare cases, the computation will fail. In such cases, we +// However, in some very rare cases, the computation will fail. In such cases, we // return an adjusted_mantissa with a negative power of 2: the caller should recompute // in such cases. template fastfloat_really_inline adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { adjusted_mantissa answer; - if ((w == 0) || (q < smallest_power_of_five) ){ + if ((w == 0) || (q < smallest_power_of_five)) { answer.power2 = 0; answer.mantissa = 0; // result should be zero @@ -95,19 +98,18 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { if(product.low == 0xFFFFFFFFFFFFFFFF) { // could guard it further // In some very rare cases, this could happen, in which case we might need a more accurate // computation that what we can provide cheaply. This is very, very unlikely. - answer.power2 = -1; + answer.power2 = -1; // This (a negative value) indicates an error condition. return answer; } // The "compute_product_approximation" function can be slightly slower than a branchless approach: // value128 product = compute_product(q, w); // but in practice, we can win big with the compute_product_approximation if its additional branch // is easily predicted. Which is best is data specific. - uint64_t upperbit = product.high >> 63; + int upperbit = int(product.high >> 63); answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); - lz += int(1 ^ upperbit); - answer.power2 = power(int(q)) - lz - binary::minimum_exponent() + 1; + answer.power2 = int(power(int(q)) + upperbit - lz - binary::minimum_exponent()); if (answer.power2 <= 0) { // we have a subnormal? // Here have that answer.power2 <= 0 so -answer.power2 >= 0 if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. @@ -115,8 +117,8 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { answer.mantissa = 0; // result should be zero return answer; - } - // next line is safe because -answer.power2 + 1 < 0 + } + // next line is safe because -answer.power2 + 1 < 64 answer.mantissa >>= -answer.power2 + 1; // Thankfully, we can't have both "round-to-even" and subnormals because // "round-to-even" only occurs for powers close to 0. @@ -136,13 +138,13 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { // usually, we round *up*, but if we fall right in between and and we have an // even basis, we need to round down // We are only concerned with the cases where 5**q fits in single 64-bit word. - if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && + if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && ((answer.mantissa & 3) == 1) ) { // we may fall between two floats! // To be in-between two floats we need that in doing // answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); // ... we dropped out only zeroes. But if this happened, then we can go back!!! if((answer.mantissa << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) == product.high) { - answer.mantissa &= ~1; // flip it so that we do not round up + answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up } } @@ -161,7 +163,8 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { return answer; } + } // namespace fast_float -} // namespace arrow_vendored +} // namespace arrow_vendored #endif diff --git a/cpp/src/arrow/vendored/fast_float/fast_float.h b/cpp/src/arrow/vendored/fast_float/fast_float.h index 0e7acf5f84d..3e39cac90cf 100644 --- a/cpp/src/arrow/vendored/fast_float/fast_float.h +++ b/cpp/src/arrow/vendored/fast_float/fast_float.h @@ -20,19 +20,19 @@ struct from_chars_result { /** * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting - * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. - * The resulting floating-point value is the closest floating-point values (using either float or double), + * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. + * The resulting floating-point value is the closest floating-point values (using either float or double), * using the "round to even" convention for values that would otherwise fall right in-between two values. * That is, we provide exact parsing according to the IEEE standard. - * + * * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored. - * + * * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`). - * + * * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of - * the type `fast_float::chars_format`. It is a bitset value: we check whether + * the type `fast_float::chars_format`. It is a bitset value: we check whether * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set * to determine whether we allowe the fixed point and scientific notation respectively. * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. @@ -42,6 +42,7 @@ from_chars_result from_chars(const char *first, const char *last, T &value, chars_format fmt = chars_format::general) noexcept; } -} // namespace arrow_vendored +} // namespace arrow_vendored + #include "parse_number.h" #endif // FASTFLOAT_FAST_FLOAT_H diff --git a/cpp/src/arrow/vendored/fast_float/fast_table.h b/cpp/src/arrow/vendored/fast_float/fast_table.h index ac34fe7cefc..97f722a3648 100644 --- a/cpp/src/arrow/vendored/fast_float/fast_table.h +++ b/cpp/src/arrow/vendored/fast_float/fast_table.h @@ -20,18 +20,18 @@ namespace fast_float { * The smallest non-zero float (binary64) is 2^−1074. * We take as input numbers of the form w x 10^q where w < 2^64. * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. - * However, we have that + * However, we have that * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^−1074. - * Thus it is possible for a number of the form w * 10^-342 where + * Thus it is possible for a number of the form w * 10^-342 where * w is a 64-bit value to be a non-zero floating-point number. ********* - * Any number of form w * 10^309 where w>= 1 is going to be + * Any number of form w * 10^309 where w>= 1 is going to be * infinite in binary64 so we never need to worry about powers * of 5 greater than 308. */ constexpr int smallest_power_of_five = -342; constexpr int largest_power_of_five = 308; -// truncated powers of five from 5^-344 all the way to 5^308 +// Powers of five from 5^-342 all the way to 5^308 rounded toward one. const uint64_t power_of_five_128[]= { 0xeef453d6923bd65a,0x113faa2906a13b3f, 0x9558b4661b6565f8,0x4ac7ca59a424c507, @@ -348,16 +348,16 @@ const uint64_t power_of_five_128[]= { 0xa2425ff75e14fc31,0xa1258379a94d028d, 0xcad2f7f5359a3b3e,0x96ee45813a04330, 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, - 0x9e74d1b791e07e48,0x775ea264cf55347e, - 0xc612062576589dda,0x95364afe032a81a0, - 0xf79687aed3eec551,0x3a83ddbd83f52210, - 0x9abe14cd44753b52,0xc4926a9672793580, - 0xc16d9a0095928a27,0x75b7053c0f178400, - 0xf1c90080baf72cb1,0x5324c68b12dd6800, - 0x971da05074da7bee,0xd3f6fc16ebca8000, - 0xbce5086492111aea,0x88f4bb1ca6bd0000, - 0xec1e4a7db69561a5,0x2b31e9e3d0700000, - 0x9392ee8e921d5d07,0x3aff322e62600000, + 0x9e74d1b791e07e48,0x775ea264cf55347d, + 0xc612062576589dda,0x95364afe032a819d, + 0xf79687aed3eec551,0x3a83ddbd83f52204, + 0x9abe14cd44753b52,0xc4926a9672793542, + 0xc16d9a0095928a27,0x75b7053c0f178293, + 0xf1c90080baf72cb1,0x5324c68b12dd6338, + 0x971da05074da7bee,0xd3f6fc16ebca5e03, + 0xbce5086492111aea,0x88f4bb1ca6bcf584, + 0xec1e4a7db69561a5,0x2b31e9e3d06c32e5, + 0x9392ee8e921d5d07,0x3aff322e62439fcf, 0xb877aa3236a4b449,0x9befeb9fad487c3, 0xe69594bec44de15b,0x4c2ebe687989a9b4, 0x901d7cf73ab0acd9,0xf9d37014bf60a11, @@ -686,6 +686,6 @@ const uint64_t power_of_five_128[]= { 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; } -} // namespace arrow_vendored +} // namespace arrow_vendored #endif diff --git a/cpp/src/arrow/vendored/fast_float/float_common.h b/cpp/src/arrow/vendored/fast_float/float_common.h index 4d82e8769c2..6127fe69492 100644 --- a/cpp/src/arrow/vendored/fast_float/float_common.h +++ b/cpp/src/arrow/vendored/fast_float/float_common.h @@ -3,47 +3,101 @@ #include #include -#ifndef _WIN32 -// strcasecmp, strncasecmp -#include +#include + +#if (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ + || defined(__arm__) \ + || defined(__MINGW32__)) +#define FASTFLOAT_32BIT +#elif (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \ + || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \ + || defined(__MINGW64__) \ + || defined(__s390x__) \ + || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__))) +#define FASTFLOAT_64BIT +#else +#error Unknown platform #endif -#ifdef _MSC_VER -#define fastfloat_really_inline __forceinline -#else -#define fastfloat_really_inline inline __attribute__((always_inline)) -#endif +#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) +#include +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define FASTFLOAT_VISUAL_STUDIO 1 +#endif #ifdef _WIN32 -#define fastfloat_strcasecmp _stricmp -#define fastfloat_strncasecmp _strnicmp +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include #else -#define fastfloat_strcasecmp strcasecmp -#define fastfloat_strncasecmp strncasecmp +#include +#endif +# +#ifndef __BYTE_ORDER__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#ifndef __ORDER_LITTLE_ENDIAN__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#define FASTFLOAT_IS_BIG_ENDIAN 1 #endif -namespace arrow_vendored { -namespace fast_float { -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif +#ifdef FASTFLOAT_VISUAL_STUDIO +#define fastfloat_really_inline __forceinline +#else +#define fastfloat_really_inline inline __attribute__((always_inline)) +#endif +namespace arrow_vendored { +namespace fast_float { +// Compares two ASCII strings in a case insensitive manner. +inline bool fastfloat_strncasecmp(const char *input1, const char *input2, + size_t length) { + char running_diff{0}; + for (size_t i = 0; i < length; i++) { + running_diff |= (input1[i] ^ input2[i]); + } + return (running_diff == 0) || (running_diff == 32); +} +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif - -bool is_space(uint8_t c) { - static const bool table[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return table[c]; +inline bool is_space(uint8_t c) { + static const bool table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return table[c]; } namespace { constexpr uint32_t max_digits = 768; - +constexpr uint32_t max_digit_without_overflow = 19; constexpr int32_t decimal_point_range = 2047; } // namespace - struct value128 { uint64_t low; uint64_t high; @@ -51,35 +105,44 @@ struct value128 { value128() : low(0), high(0) {} }; - /* result might be undefined when input_num is zero */ -fastfloat_really_inline -int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER +fastfloat_really_inline int leading_zeroes(uint64_t input_num) { + assert(input_num > 0); +#ifdef FASTFLOAT_VISUAL_STUDIO + #if defined(_M_X64) || defined(_M_ARM64) unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; + _BitScanReverse64(&leading_zero, input_num); + return (int)(63 - leading_zero); + #else + int last_bit = 0; + if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, last_bit |= 32; + if(input_num & uint64_t( 0xffff0000)) input_num >>= 16, last_bit |= 16; + if(input_num & uint64_t( 0xff00)) input_num >>= 8, last_bit |= 8; + if(input_num & uint64_t( 0xf0)) input_num >>= 4, last_bit |= 4; + if(input_num & uint64_t( 0xc)) input_num >>= 2, last_bit |= 2; + if(input_num & uint64_t( 0x2)) input_num >>= 1, last_bit |= 1; + return 63 - last_bit; + #endif #else return __builtin_clzll(input_num); #endif } +#ifdef FASTFLOAT_32BIT -#if defined(_WIN32) && !defined(__clang__) -// Note MinGW falls here too -#include - -#if !defined(_M_X64) && !defined(_M_ARM64)// _umul128 for x86, arm -// this is a slow emulation routine for 32-bit Windows -// +#if (!defined(_WIN32)) || defined(__MINGW32__) +// slow emulation routine for 32-bit fastfloat_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; + return x * (uint64_t)y; } -fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { +#endif + +// slow emulation routine for 32-bit +#if !defined(__MINGW64__) +fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, + uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -89,56 +152,103 @@ fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi (adbc_carry << 32) + !!(lo < bd); return lo; } -#endif +#endif // !__MINGW64__ -fastfloat_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 - return answer; -} +#endif // FASTFLOAT_32BIT -#else -// compute value1 * value2 -fastfloat_really_inline -value128 full_multiplication(uint64_t value1, uint64_t value2) { +// compute 64-bit a*b +fastfloat_really_inline value128 full_multiplication(uint64_t a, + uint64_t b) { value128 answer; - __uint128_t r = ((__uint128_t)value1) * value2; +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emulate + answer.high = __umulh(a, b); + answer.low = a * b; +#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64)) + answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 +#elif defined(FASTFLOAT_64BIT) + __uint128_t r = ((__uint128_t)a) * b; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); +#else + #error Not implemented +#endif return answer; } -#endif struct adjusted_mantissa { - uint64_t mantissa; - int power2; - adjusted_mantissa() : mantissa(0), power2(0) {} + uint64_t mantissa{0}; + int power2{0}; // a negative value indicate an invalid result + adjusted_mantissa() = default; + // bool operator==(const adjusted_mantissa &o) const = default; + bool operator==(const adjusted_mantissa &o) const { + return mantissa == o.mantissa && power2 == o.power2; + } }; struct decimal { - uint32_t num_digits; - int32_t decimal_point; - bool negative; - bool truncated; + uint32_t num_digits{0}; + int32_t decimal_point{0}; + bool negative{false}; + bool truncated{false}; uint8_t digits[max_digits]; + decimal() = default; + // Copies are not allowed since this is a fat object. + decimal(const decimal &) = delete; + // Copies are not allowed since this is a fat object. + decimal &operator=(const decimal &) = delete; + // Moves are allowed: + decimal(decimal &&) = default; + decimal &operator=(decimal &&other) = default; + // Generates a mantissa by truncating to 19 digits. + // This function should be reasonably fast. + // Note that the user is responsible to ensure that digits are + // initialized to zero when there are fewer than 19. + inline uint64_t to_truncated_mantissa() { +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + uint64_t mantissa = 0; + for (uint32_t i = 0; i < max_digit_without_overflow; + i++) { + mantissa = mantissa * 10 + digits[i]; // can be accelerated + } + return mantissa; +#else + uint64_t val; + // 8 first digits + ::memcpy(&val, digits, sizeof(uint64_t)); + val = val * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + uint64_t mantissa = + uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); + // 8 more digits for a total of 16 + ::memcpy(&val, digits + sizeof(uint64_t), sizeof(uint64_t)); + val = val * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + uint32_t eight_digits_value = + uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); + mantissa = 100000000 * mantissa + eight_digits_value; + for (uint32_t i = 2 * sizeof(uint64_t); i < max_digit_without_overflow; + i++) { + mantissa = mantissa * 10 + digits[i]; // can be accelerated + } + return mantissa; +#endif + } + // Generate san exponent matching to_truncated_mantissa() + inline int32_t to_truncated_exponent() { + return decimal_point - int32_t(max_digit_without_overflow); + } }; constexpr static double powers_of_ten_double[] = { 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; -constexpr static float powers_of_ten_float[] = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}; +constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, + 1e6, 1e7, 1e8, 1e9, 1e10}; -template -struct binary_format { +template struct binary_format { static constexpr int mantissa_explicit_bits(); static constexpr int minimum_exponent(); static constexpr int infinite_power(); @@ -151,73 +261,54 @@ struct binary_format { static constexpr T exact_power_of_ten(int64_t power); }; -template <> -constexpr int binary_format::mantissa_explicit_bits() { +template <> constexpr int binary_format::mantissa_explicit_bits() { return 52; } -template <> -constexpr int binary_format::mantissa_explicit_bits() { +template <> constexpr int binary_format::mantissa_explicit_bits() { return 23; } -template <> -constexpr int binary_format::max_exponent_round_to_even() { +template <> constexpr int binary_format::max_exponent_round_to_even() { return 23; } -template <> -constexpr int binary_format::max_exponent_round_to_even() { +template <> constexpr int binary_format::max_exponent_round_to_even() { return 10; } - -template <> -constexpr int binary_format::min_exponent_round_to_even() { +template <> constexpr int binary_format::min_exponent_round_to_even() { return -4; } -template <> -constexpr int binary_format::min_exponent_round_to_even() { +template <> constexpr int binary_format::min_exponent_round_to_even() { return -17; } -template <> -constexpr int binary_format::minimum_exponent() { +template <> constexpr int binary_format::minimum_exponent() { return -1023; } -template <> -constexpr int binary_format::minimum_exponent() { +template <> constexpr int binary_format::minimum_exponent() { return -127; } -template <> -constexpr int binary_format::infinite_power() { - return 0x7FF; +template <> constexpr int binary_format::infinite_power() { + return 0x7FF; } -template <> -constexpr int binary_format::infinite_power() { +template <> constexpr int binary_format::infinite_power() { return 0xFF; } -template <> -constexpr int binary_format::sign_index() { - return 63; -} -template <> -constexpr int binary_format::sign_index() { - return 31; -} +template <> constexpr int binary_format::sign_index() { return 63; } +template <> constexpr int binary_format::sign_index() { return 31; } -template <> -constexpr int binary_format::min_exponent_fast_path() { +template <> constexpr int binary_format::min_exponent_fast_path() { #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) return 0; #else return -22; #endif } -template <> -constexpr int binary_format::min_exponent_fast_path() { +template <> constexpr int binary_format::min_exponent_fast_path() { #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) return 0; #else @@ -225,28 +316,22 @@ constexpr int binary_format::min_exponent_fast_path() { #endif } - -template <> -constexpr int binary_format::max_exponent_fast_path() { +template <> constexpr int binary_format::max_exponent_fast_path() { return 22; } -template <> -constexpr int binary_format::max_exponent_fast_path() { +template <> constexpr int binary_format::max_exponent_fast_path() { return 10; } - -template <> -constexpr uint64_t binary_format::max_mantissa_fast_path() { +template <> constexpr uint64_t binary_format::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); } -template <> -constexpr uint64_t binary_format::max_mantissa_fast_path() { +template <> constexpr uint64_t binary_format::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); } template <> -constexpr double binary_format::exact_power_of_ten(int64_t power) { +constexpr double binary_format::exact_power_of_ten(int64_t power) { return powers_of_ten_double[power]; } template <> @@ -255,9 +340,18 @@ constexpr float binary_format::exact_power_of_ten(int64_t power) { return powers_of_ten_float[power]; } - +// for convenience: +#include +inline std::ostream &operator<<(std::ostream &out, const fast_float::decimal &d) { + out << "0."; + for (size_t i = 0; i < d.num_digits; i++) { + out << int32_t(d.digits[i]); + } + out << " * 10 ** " << d.decimal_point; + return out; +} } // namespace fast_float -} // namespace arrow_vendored +} // namespace arrow_vendored #endif diff --git a/cpp/src/arrow/vendored/fast_float/parse_number.h b/cpp/src/arrow/vendored/fast_float/parse_number.h index f27a6d82f91..a570a5cbfbc 100644 --- a/cpp/src/arrow/vendored/fast_float/parse_number.h +++ b/cpp/src/arrow/vendored/fast_float/parse_number.h @@ -2,7 +2,7 @@ #define FASTFLOAT_PARSE_NUMBER_H #include "ascii_number.h" #include "decimal_to_binary.h" -#include "thompson_tao.h" +#include "simple_decimal_conversion.h" #include #include @@ -17,7 +17,7 @@ namespace fast_float { namespace { /** * Special case +inf, -inf, nan, infinity, -infinity. - * The case comparisons could be made much faster given that we know that the + * The case comparisons could be made much faster given that we know that the * strings a null-free and fixed. **/ template @@ -25,14 +25,13 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n from_chars_result answer; answer.ec = std::errc(); // be optimistic if (last - first >= 3) { - if (fastfloat_strncasecmp(first, "nan", 3) == 0) { + if (fastfloat_strncasecmp(first, "nan", 3)) { answer.ptr = first + 3; value = std::numeric_limits::quiet_NaN(); return answer; } - if (fastfloat_strncasecmp(first, "inf", 3) == 0) { - - if ((last - first >= 8) && (fastfloat_strncasecmp(first, "infinity", 8) == 0)) { + if (fastfloat_strncasecmp(first, "inf", 3)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first, "infinity", 8)) { answer.ptr = first + 8; } else { answer.ptr = first + 3; @@ -41,7 +40,7 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } if (last - first >= 4) { - if ((fastfloat_strncasecmp(first, "+nan", 4) == 0) || (fastfloat_strncasecmp(first, "-nan", 4) == 0)) { + if (fastfloat_strncasecmp(first, "+nan", 4) || fastfloat_strncasecmp(first, "-nan", 4)) { answer.ptr = first + 4; value = std::numeric_limits::quiet_NaN(); if (first[0] == '-') { @@ -50,8 +49,8 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } - if ((fastfloat_strncasecmp(first, "+inf", 4) == 0) || (fastfloat_strncasecmp(first, "-inf", 4) == 0)) { - if ((last - first >= 8) && (fastfloat_strncasecmp(first + 1, "infinity", 8) == 0)) { + if (fastfloat_strncasecmp(first, "+inf", 4) || fastfloat_strncasecmp(first, "-inf", 4)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first + 1, "infinity", 8)) { answer.ptr = first + 9; } else { answer.ptr = first + 4; @@ -65,6 +64,7 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n } } answer.ec = std::errc::invalid_argument; + answer.ptr = first; return answer; } } // namespace @@ -78,7 +78,7 @@ from_chars_result from_chars(const char *first, const char *last, from_chars_result answer; - while ((first != last) && fast_float::is_space(*first)) { + while ((first != last) && fast_float::is_space(uint8_t(*first))) { first++; } if (first == last) { @@ -95,24 +95,33 @@ from_chars_result from_chars(const char *first, const char *last, if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path()) { value = T(pns.mantissa); - if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } else { value = value * binary_format::exact_power_of_ten(pns.exponent); } if (pns.negative) { value = -value; } return answer; } adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa>(first,last) : compute_float>(pns.exponent, pns.mantissa); - if(am.power2 < 0) { - am = parse_long_mantissa>(first,last); - } + // If we called compute_float>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0), + // then we need to go the long way around again. This is very uncommon. + if(am.power2 < 0) { am = parse_long_mantissa>(first,last); } uint64_t word = am.mantissa; word |= uint64_t(am.power2) << binary_format::mantissa_explicit_bits(); - word = pns.negative + word = pns.negative ? word | (uint64_t(1) << binary_format::sign_index()) : word; - memcpy(&value, &word, sizeof(T)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + if (std::is_same::value) { + ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian + } else { + ::memcpy(&value, &word, sizeof(T)); + } +#else + // For little-endian systems: + ::memcpy(&value, &word, sizeof(T)); +#endif return answer; } } // namespace fast_float -} // namespace arrow_vendored +} // namespace arrow_vendored #endif diff --git a/cpp/src/arrow/vendored/fast_float/thompson_tao.h b/cpp/src/arrow/vendored/fast_float/simple_decimal_conversion.h similarity index 91% rename from cpp/src/arrow/vendored/fast_float/thompson_tao.h rename to cpp/src/arrow/vendored/fast_float/simple_decimal_conversion.h index c9ec1870c99..2dc3ee4d380 100644 --- a/cpp/src/arrow/vendored/fast_float/thompson_tao.h +++ b/cpp/src/arrow/vendored/fast_float/simple_decimal_conversion.h @@ -3,14 +3,15 @@ /** * This code is meant to handle the case where we have more than 19 digits. - * - * Based on work by Nigel Tao (at https://github.com/google/wuffs/) + * + * It is based on work by Nigel Tao (at https://github.com/google/wuffs/) * who credits Ken Thompson for the design (via a reference to the Go source - * code). See - * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c - * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c + * code). + * + * Rob Pike suggested that this algorithm be called "Simple Decimal Conversion". + * * It is probably not very fast but it is a fallback that should almost never - * be used in reallife. + * be used in real life. Though it is not fast, it is "easily" understood and debugged. **/ #include "ascii_number.h" #include "decimal_to_binary.h" @@ -28,22 +29,9 @@ inline void trim(decimal &h) { } } -#if 0 -/** If you ever want to see what is going on, the following function might prove handy: - * **/ -void print(const decimal d, int32_t exp2 = 0) { - printf("0."); - for(size_t i = 0; i < d.num_digits; i++) { - printf("%d", int(d.digits[i])); - } - printf(" * 10 **%d ", d.decimal_point); - printf(" * 2 **%d ", exp2); -} -#endif - -uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { +uint32_t number_of_digits_decimal_left_shift(const decimal &h, uint32_t shift) { shift &= 63; const static uint16_t number_of_digits_decimal_left_shift_table[65] = { 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, @@ -136,8 +124,6 @@ uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { return num_new_digits; } -} // end of anonymous namespace - uint64_t round(decimal &h) { if ((h.num_digits == 0) || (h.decimal_point < 0)) { return 0; @@ -152,7 +138,7 @@ uint64_t round(decimal &h) { } bool round_up = false; if (dp < h.num_digits) { - round_up = h.digits[dp] >= 5; // normally, we round up + round_up = h.digits[dp] >= 5; // normally, we round up // but we may need to round to even! if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); @@ -253,6 +239,7 @@ void decimal_right_shift(decimal &h, uint32_t shift) { trim(h); } +} // end of anonymous namespace template adjusted_mantissa compute_float(decimal &d) { @@ -266,21 +253,21 @@ adjusted_mantissa compute_float(decimal &d) { // At this point, going further, we can assume that d.num_digits > 0. // // We want to guard against excessive decimal point values because - // they can result in long running times. Indeed, we do + // they can result in long running times. Indeed, we do // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not // fine (runs for a long time). // if(d.decimal_point < -324) { // We have something smaller than 1e-324 which is always zero - // in binary64 and binary32. + // in binary64 and binary32. // It should be zero. answer.power2 = 0; answer.mantissa = 0; return answer; } else if(d.decimal_point >= 310) { // We have something at least as large as 0.1e310 which is - // always infinite. + // always infinite. answer.power2 = binary::infinite_power(); answer.mantissa = 0; return answer; @@ -367,9 +354,20 @@ adjusted_mantissa compute_float(decimal &d) { template adjusted_mantissa parse_long_mantissa(const char *first, const char* last) { decimal d = parse_decimal(first, last); + // In some cases we can get lucky and looking at only the first 19 digits is enough. + // Let us try that. + const uint64_t mantissa = d.to_truncated_mantissa(); + const int64_t exponent = d.to_truncated_exponent(); + // credit: R. Oudompheng who first implemented this fast path (to my knowledge). + // It is rough, but it does the job of accelerating the slow path since most + // long streams of digits are determined after 19 digits. + adjusted_mantissa am1 = compute_float(exponent, mantissa); + adjusted_mantissa am2 = compute_float(exponent, mantissa+1); + // They must both agree and be both a successful result. + if(( am1 == am2 ) && (am1.power2 >= 0)) { return am1; } return compute_float(d); } } // namespace fast_float -} // namespace arrow_vendored +} // namespace arrow_vendored #endif