Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions cpp/src/arrow/vendored/fast_float/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
The files in this directory are vendored from fast_float
git changeset `dc46ad4c606dc35cb63c947496a18ef8ab1e0f44`.
git changeset `70c9b7f884c7f80a9a0e06fa9754c0a2e6a9492e`.

See https://github.com/lemire/fast_float

Changes:
- fixed include paths
- disabled unused `print()` function
- enclosed in `arrow_vendored` namespace.
198 changes: 72 additions & 126 deletions cpp/src/arrow/vendored/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,40 +11,31 @@
namespace arrow_vendored {
namespace fast_float {

fastfloat_really_inline bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); }
// Next function can be micro-optimized, but compilers are entirely
// able to optimize it well.
fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; }


// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
uint64_t val;
memcpy(&val, chars, sizeof(uint64_t));
::memcpy(&val, chars, sizeof(uint64_t));
val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
}

fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
uint64_t val;
memcpy(&val, chars, 8);
fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
return (((val & 0xF0F0F0F0F0F0F0F0) |
(((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
0x3333333333333333);
}


fastfloat_really_inline uint32_t parse_four_digits_unrolled(const char *chars) noexcept {
uint32_t val;
memcpy(&val, chars, sizeof(uint32_t));
val = (val & 0x0F0F0F0F) * 2561 >> 8;
return (val & 0x00FF00FF) * 6553601 >> 16;
}

fastfloat_really_inline bool is_made_of_four_digits_fast(const char *chars) noexcept {
uint32_t val;
memcpy(&val, chars, 4);
return (((val & 0xF0F0F0F0) |
(((val + 0x06060606) & 0xF0F0F0F0) >> 4)) ==
0x33333333);
fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept {
uint64_t val;
::memcpy(&val, chars, 8);
return is_made_of_eight_digits_fast(val);
}

struct parsed_number_string {
Expand All @@ -57,7 +48,7 @@ struct parsed_number_string {
};


// Assuming that you use no more than 17 digits, this will
// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
fastfloat_really_inline
parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept {
Expand All @@ -81,13 +72,15 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i +
(*p - '0'); // might overflow, we will handle the overflow later
uint64_t(*p - '0'); // might overflow, we will handle the overflow later
++p;
}
int64_t exponent = 0;
if ((p != pend) && (*p == '.')) {
++p;
const char *first_after_period = p;
#if FASTFLOAT_IS_BIG_ENDIAN == 0
// Fast approach only tested under little endian systems
if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
p += 8;
Expand All @@ -96,6 +89,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
p += 8;
}
}
#endif
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
++p;
Expand All @@ -110,9 +104,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_

int32_t digit_count =
int32_t(p - start_digits - 1); // used later to guard against overflows
if ((p != pend) && (('e' == *p) || ('E' == *p))) {
if((fmt & chars_format::fixed) && !(fmt & chars_format::scientific)) { return answer; }

if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
const char * location_of_e = p;
int64_t exp_number = 0; // exponential part
++p;
bool neg_exp = false;
Expand All @@ -123,18 +117,25 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
++p;
}
if ((p == pend) || !is_integer(*p)) {
return answer;
}
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
if (exp_number < 0x10000) {
exp_number = 10 * exp_number + digit;
if(!(fmt & chars_format::fixed)) {
// We are in error.
return answer;
}
++p;
// Otherwise, we will be ignoring the 'e'.
p = location_of_e;
} else {
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
if (exp_number < 0x10000) {
exp_number = 10 * exp_number + digit;
}
++p;
}
exponent += (neg_exp ? -exp_number : exp_number);
}
exponent += (neg_exp ? -exp_number : exp_number);
} else {
if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
// If it scientific and not fixed, we have to bail out.
if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
}
answer.lastmatch = p;
answer.valid = true;
Expand Down Expand Up @@ -163,128 +164,65 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
return answer;
}

// This should always succeed since it follows a call to parse_number_string.
// It assumes that there are more than 19 mantissa digits to parse.
parsed_number_string parse_truncated_decimal(const char *&p, const char *pend) noexcept {
parsed_number_string answer;
answer.valid = true;
answer.negative = (*p == '-');
if ((*p == '-') || (*p == '+')) {
++p;
}
size_t number_of_digits{0};


uint64_t i = 0;

while ((p != pend) && is_integer(*p)) {
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
if(number_of_digits < 19) {

uint8_t digit = uint8_t(*p - '0');
i = 10 * i + digit;
number_of_digits ++;
}
++p;
}
int64_t exponent = 0;
if ((p != pend) && (*p == '.')) {
++p;
const char *first_after_period = p;

while ((p != pend) && is_integer(*p)) {
if(number_of_digits < 19) {
uint8_t digit = uint8_t(*p - '0');
i = i * 10 + digit;
number_of_digits ++;
} else if (exponent == 0) {
exponent = first_after_period - p;
}
++p;
}
}

if ((p != pend) && (('e' == *p) || ('E' == *p))) {
int64_t exp_number = 0; // exponential part
++p;
bool neg_exp = false;
if ((p != pend) && ('-' == *p)) {
neg_exp = true;
++p;
} else if ((p != pend) && ('+' == *p)) {
++p;
}
if ((p == pend) || !is_integer(*p)) {
return answer;
}
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
if (exp_number < 0x10000) {
exp_number = 10 * exp_number + digit;
}
++p;
}
exponent += (neg_exp ? -exp_number : exp_number);
}
answer.lastmatch = p;
answer.valid = true;
answer.too_many_digits = true; // assumed
answer.exponent = exponent;
answer.mantissa = i;
return answer;
}


// This should always succeed since it follows a call to parse_number_string.
decimal parse_decimal(const char *&p, const char *pend) noexcept {
// This should always succeed since it follows a call to parse_number_string
// This function could be optimized. In particular, we could stop after 19 digits
// and try to bail out. Furthermore, we should be able to recover the computed
// exponent from the pass in parse_number_string.
fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend) noexcept {
decimal answer;
answer.num_digits = 0;
answer.decimal_point = 0;
answer.negative = false;
answer.truncated = false;
// skip leading whitespace
while (fast_float::is_space(*p)) {
p++;
}
// any whitespace has been skipped.
answer.negative = (*p == '-');
if ((*p == '-') || (*p == '+')) {
++p;
}

// skip leading zeroes
while ((p != pend) && (*p == '0')) {
++p;
}
while ((p != pend) && is_integer(*p)) {
if (answer.num_digits + 1 < max_digits) {
answer.digits[answer.num_digits++] = uint8_t(*p - '0');
} else {
answer.truncated = true;
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
answer.num_digits++;
++p;
}
const char *first_after_period{};
if ((p != pend) && (*p == '.')) {
++p;
first_after_period = p;
const char *first_after_period = p;
// if we have not yet encountered a zero, we have to skip it as well
if(answer.num_digits == 0) {
// skip zeros
while ((p != pend) && (*p == '0')) {
++p;
}
}
#if FASTFLOAT_IS_BIG_ENDIAN == 0
// We expect that this loop will often take the bulk of the running time
// because when a value has lots of digits, these digits often
while ((p + 8 <= pend) && (answer.num_digits + 8 < max_digits)) {
uint64_t val;
::memcpy(&val, p, sizeof(uint64_t));
if(! is_made_of_eight_digits_fast(val)) { break; }
// We have eight digits, process them in one go!
val -= 0x3030303030303030;
::memcpy(answer.digits + answer.num_digits, &val, sizeof(uint64_t));
answer.num_digits += 8;
p += 8;
}
#endif
while ((p != pend) && is_integer(*p)) {
if (answer.num_digits + 1 < max_digits) {
answer.digits[answer.num_digits++] = uint8_t(*p - '0');
} else {
answer.truncated = true;
if (answer.num_digits < max_digits) {
answer.digits[answer.num_digits] = uint8_t(*p - '0');
}
answer.num_digits++;
++p;
}
answer.decimal_point = int32_t(first_after_period - p);
}

if ((p != pend) && (('e' == *p) || ('E' == *p))) {
++p;
bool neg_exp = false;
Expand All @@ -299,15 +237,23 @@ decimal parse_decimal(const char *&p, const char *pend) noexcept {
uint8_t digit = uint8_t(*p - '0');
if (exp_number < 0x10000) {
exp_number = 10 * exp_number + digit;
}
}
++p;
}
answer.decimal_point += (neg_exp ? -exp_number : exp_number);
}
answer.decimal_point += answer.num_digits;
answer.decimal_point += int32_t(answer.num_digits);
if(answer.num_digits > max_digits) {
answer.truncated = true;
answer.num_digits = max_digits;
}
// In very rare cases, we may have fewer than 19 digits, we want to be able to reliably
// assume that all digits up to max_digit_without_overflow have been initialized.
for(uint32_t i = answer.num_digits; i < max_digit_without_overflow; i++) { answer.digits[i] = 0; }

return answer;
}
} // namespace fast_float
} // namespace arrow_vendored
} // namespace arrow_vendored

#endif
Loading