diff --git a/cpp/src/arrow/vendored/fast_float/README.md b/cpp/src/arrow/vendored/fast_float/README.md
index 4e0728c69ec..d0b249e4973 100644
--- a/cpp/src/arrow/vendored/fast_float/README.md
+++ b/cpp/src/arrow/vendored/fast_float/README.md
@@ -1,9 +1,7 @@
 The files in this directory are vendored from fast_float
-git changeset `dc46ad4c606dc35cb63c947496a18ef8ab1e0f44`.
+git changeset `70c9b7f884c7f80a9a0e06fa9754c0a2e6a9492e`.
 
 See https://github.com/lemire/fast_float
 
 Changes:
-- fixed include paths
-- disabled unused `print()` function
 - enclosed in `arrow_vendored` namespace.
diff --git a/cpp/src/arrow/vendored/fast_float/ascii_number.h b/cpp/src/arrow/vendored/fast_float/ascii_number.h
index d1f8af4087c..d41e6f46d02 100644
--- a/cpp/src/arrow/vendored/fast_float/ascii_number.h
+++ b/cpp/src/arrow/vendored/fast_float/ascii_number.h
@@ -11,40 +11,31 @@
 namespace arrow_vendored {
 namespace fast_float {
 
-fastfloat_really_inline bool is_integer(char c)  noexcept  { return (c >= '0' && c <= '9'); }
+// Next function can be micro-optimized, but compilers are entirely
+// able to optimize it well.
+fastfloat_really_inline bool is_integer(char c)  noexcept  { return c >= '0' && c <= '9'; }
 
 
 // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
 fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
   uint64_t val;
-  memcpy(&val, chars, sizeof(uint64_t));
+  ::memcpy(&val, chars, sizeof(uint64_t));
   val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8;
   val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
   return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
 }
 
-fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
-  uint64_t val;
-  memcpy(&val, chars, 8);
+fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
   return (((val & 0xF0F0F0F0F0F0F0F0) |
            (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
           0x3333333333333333);
 }
 
 
-fastfloat_really_inline uint32_t parse_four_digits_unrolled(const char *chars)  noexcept  {
-  uint32_t val;
-  memcpy(&val, chars, sizeof(uint32_t));
-  val = (val & 0x0F0F0F0F) * 2561 >> 8;
-  return (val & 0x00FF00FF) * 6553601 >> 16;
-}
-
-fastfloat_really_inline bool is_made_of_four_digits_fast(const char *chars)  noexcept  {
-  uint32_t val;
-  memcpy(&val, chars, 4);
-  return (((val & 0xF0F0F0F0) |
-           (((val + 0x06060606) & 0xF0F0F0F0) >> 4)) ==
-          0x33333333);
+fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
+  uint64_t val;
+  ::memcpy(&val, chars, 8);
+  return is_made_of_eight_digits_fast(val);
 }
 
 struct parsed_number_string {
@@ -57,7 +48,7 @@ struct parsed_number_string {
 };
 
 
-// Assuming that you use no more than 17 digits, this will
+// Assuming that you use no more than 19 digits, this will
 // parse an ASCII string.
 fastfloat_really_inline
 parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept {
@@ -81,13 +72,15 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
     // a multiplication by 10 is cheaper than an arbitrary integer
     // multiplication
     i = 10 * i +
-        (*p - '0'); // might overflow, we will handle the overflow later
+        uint64_t(*p - '0'); // might overflow, we will handle the overflow later
     ++p;
   }
   int64_t exponent = 0;
   if ((p != pend) && (*p == '.')) {
     ++p;
     const char *first_after_period = p;
+#if FASTFLOAT_IS_BIG_ENDIAN == 0
+    // Fast approach only tested under little endian systems
     if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) {
       i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
       p += 8;
@@ -96,6 +89,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
         p += 8;
       }
     }
+#endif
     while ((p != pend) && is_integer(*p)) {
       uint8_t digit = uint8_t(*p - '0');
       ++p;
@@ -110,9 +104,9 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
 
   int32_t digit_count =
       int32_t(p - start_digits - 1); // used later to guard against overflows
-  
-  if ((p != pend) && (('e' == *p) || ('E' == *p))) {
-    if((fmt & chars_format::fixed) && !(fmt & chars_format::scientific)) { return answer; } 
+ 
+  if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
+    const char * location_of_e = p;
     int64_t exp_number = 0;            // exponential part
     ++p;
     bool neg_exp = false;
@@ -123,18 +117,25 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
       ++p;
     }
     if ((p == pend) || !is_integer(*p)) {
-      return answer;
-    }
-    while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - '0');
-      if (exp_number < 0x10000) {
-        exp_number = 10 * exp_number + digit;
+      if(!(fmt & chars_format::fixed)) {
+        // We are in error.
+        return answer;
       }
-      ++p;
+      // Otherwise, we will be ignoring the 'e'.
+      p = location_of_e;
+    } else {
+      while ((p != pend) && is_integer(*p)) {
+        uint8_t digit = uint8_t(*p - '0');
+        if (exp_number < 0x10000) {
+          exp_number = 10 * exp_number + digit;
+        }
+        ++p;
+      }
+      exponent += (neg_exp ? -exp_number : exp_number);
     }
-    exponent += (neg_exp ? -exp_number : exp_number);
   } else {
-    if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } 
+    // If it scientific and not fixed, we have to bail out.
+    if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
   }
   answer.lastmatch = p;
   answer.valid = true;
@@ -163,110 +164,35 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
   return answer;
 }
 
-// This should always succeed since it follows a call to parse_number_string.
-// It assumes that there are more than 19 mantissa digits to parse.
-parsed_number_string parse_truncated_decimal(const char *&p, const char *pend)  noexcept  {
-  parsed_number_string answer;
-  answer.valid = true;
-  answer.negative = (*p == '-');
-  if ((*p == '-') || (*p == '+')) {
-    ++p;
-  }
-  size_t number_of_digits{0};
-
-
-  uint64_t i = 0; 
-
-  while ((p != pend) && is_integer(*p)) {
-    // a multiplication by 10 is cheaper than an arbitrary integer
-    // multiplication
-    if(number_of_digits < 19) {
-
-      uint8_t digit = uint8_t(*p - '0');
-      i = 10 * i + digit;
-      number_of_digits ++;
-    }
-    ++p;
-  }
-  int64_t exponent = 0;
-  if ((p != pend) && (*p == '.')) {
-    ++p;
-    const char *first_after_period = p;
-   
-    while ((p != pend) && is_integer(*p)) {
-      if(number_of_digits < 19) {
-        uint8_t digit = uint8_t(*p - '0');
-        i = i * 10 + digit;
-        number_of_digits ++;
-      } else if (exponent == 0) {
-        exponent = first_after_period - p;
-      }
-      ++p;
-    }
-  }
-
-  if ((p != pend) && (('e' == *p) || ('E' == *p))) {
-    int64_t exp_number = 0;            // exponential part
-    ++p;
-    bool neg_exp = false;
-    if ((p != pend) && ('-' == *p)) {
-      neg_exp = true;
-      ++p;
-    } else if ((p != pend) && ('+' == *p)) {
-      ++p;
-    }
-    if ((p == pend) || !is_integer(*p)) {
-      return answer;
-    }
-    while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - '0');
-      if (exp_number < 0x10000) {
-        exp_number = 10 * exp_number + digit;
-      }
-      ++p;
-    }
-    exponent += (neg_exp ? -exp_number : exp_number);
-  } 
-  answer.lastmatch = p;
-  answer.valid = true;
-  answer.too_many_digits = true; // assumed
-  answer.exponent = exponent;
-  answer.mantissa = i;
-  return answer;
-}
-
 
-// This should always succeed since it follows a call to parse_number_string.
-decimal parse_decimal(const char *&p, const char *pend)  noexcept  {
+// This should always succeed since it follows a call to parse_number_string
+// This function could be optimized. In particular, we could stop after 19 digits
+// and try to bail out. Furthermore, we should be able to recover the computed
+// exponent from the pass in parse_number_string.
+fastfloat_really_inline decimal parse_decimal(const char *p, const char *pend) noexcept {
   decimal answer;
   answer.num_digits = 0;
   answer.decimal_point = 0;
-  answer.negative = false;
   answer.truncated = false;
-  // skip leading whitespace
-  while (fast_float::is_space(*p)) {
-    p++;
-  }
+  // any whitespace has been skipped.
   answer.negative = (*p == '-');
   if ((*p == '-') || (*p == '+')) {
     ++p;
   }
-
+  // skip leading zeroes
   while ((p != pend) && (*p == '0')) {
     ++p;
   }
   while ((p != pend) && is_integer(*p)) {
-    if (answer.num_digits + 1 < max_digits) {
-      answer.digits[answer.num_digits++] = uint8_t(*p - '0');
-    } else {
-      answer.truncated = true;
+    if (answer.num_digits < max_digits) {
+      answer.digits[answer.num_digits] = uint8_t(*p - '0');
     }
+    answer.num_digits++;
     ++p;
   }
-  const char *first_after_period{};
   if ((p != pend) && (*p == '.')) {
     ++p;
-    first_after_period = p;
+    const char *first_after_period = p;
     // if we have not yet encountered a zero, we have to skip it as well
     if(answer.num_digits == 0) {
       // skip zeros
@@ -274,17 +200,29 @@ decimal parse_decimal(const char *&p, const char *pend)  noexcept  {
        ++p;
       }
     }
+#if FASTFLOAT_IS_BIG_ENDIAN == 0
+    // We expect that this loop will often take the bulk of the running time
+    // because when a value has lots of digits, these digits often
+    while ((p + 8 <= pend) && (answer.num_digits + 8 < max_digits)) {
+      uint64_t val;
+      ::memcpy(&val, p, sizeof(uint64_t));
+      if(! is_made_of_eight_digits_fast(val)) { break; }
+      // We have eight digits, process them in one go!
+      val -= 0x3030303030303030;
+      ::memcpy(answer.digits + answer.num_digits, &val, sizeof(uint64_t));
+      answer.num_digits += 8;
+      p += 8;
+    }
+#endif
     while ((p != pend) && is_integer(*p)) {
-      if (answer.num_digits + 1 < max_digits) {
-        answer.digits[answer.num_digits++] = uint8_t(*p - '0');
-      } else {
-        answer.truncated = true;
+      if (answer.num_digits < max_digits) {
+        answer.digits[answer.num_digits] = uint8_t(*p - '0');
       }
+      answer.num_digits++;
       ++p;
     }
     answer.decimal_point = int32_t(first_after_period - p);
   }
-  
   if ((p != pend) && (('e' == *p) || ('E' == *p))) {
     ++p;
     bool neg_exp = false;
@@ -299,15 +237,23 @@ decimal parse_decimal(const char *&p, const char *pend)  noexcept  {
       uint8_t digit = uint8_t(*p - '0');
       if (exp_number < 0x10000) {
         exp_number = 10 * exp_number + digit;
-      }      
+      }    
       ++p;
     }
     answer.decimal_point += (neg_exp ? -exp_number : exp_number);
   }
-  answer.decimal_point += answer.num_digits;
+  answer.decimal_point += int32_t(answer.num_digits);
+  if(answer.num_digits > max_digits) {
+    answer.truncated = true;
+    answer.num_digits = max_digits;
+  }
+  // In very rare cases, we may have fewer than 19 digits, we want to be able to reliably
+  // assume that all digits up to max_digit_without_overflow have been initialized.
+  for(uint32_t i = answer.num_digits; i < max_digit_without_overflow; i++) { answer.digits[i] = 0; }
+
   return answer;
 }
 } // namespace fast_float
-}  // namespace arrow_vendored
+} // namespace arrow_vendored
 
 #endif
diff --git a/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h b/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h
index a64a7aaca17..d2b5453ee66 100644
--- a/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h
+++ b/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h
@@ -15,24 +15,21 @@
 namespace arrow_vendored {
 namespace fast_float {
 
-
-
-
 // This will compute or rather approximate w * 5**q and return a pair of 64-bit words approximating
 // the result, with the "high" part corresponding to the most significant bits and the
 // low part corresponding to the least significant bits.
-// 
+//
 template <int bit_precision>
 fastfloat_really_inline
 value128 compute_product_approximation(int64_t q, uint64_t w) {
   const int index = 2 * int(q - smallest_power_of_five);
   // For small values of q, e.g., q in [0,27], the answer is always exact because
   // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]);
-  // gives the exact answer. 
+  // gives the exact answer.
   value128 firstproduct = full_multiplication(w, power_of_five_128[index]);
   static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should  be in (0,64]");
-  constexpr uint64_t precision_mask = (bit_precision < 64) ? 
-               (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) 
+  constexpr uint64_t precision_mask = (bit_precision < 64) ?
+               (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
                : uint64_t(0xFFFFFFFFFFFFFFFF);
   if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with  (lower + w < lower)
     // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed.
@@ -47,29 +44,35 @@ value128 compute_product_approximation(int64_t q, uint64_t w) {
 
 namespace {
 /**
- * For q in (-400,350), we have that
+ * For q in (0,350), we have that
  *  f = (((152170 + 65536) * q ) >> 16);
  * is equal to
  *   floor(p) + q
  * where
  *   p = log(5**q)/log(2) = q * log(5)/log(2)
  *
+ * For negative values of q in (-400,0), we have that 
+ *  f = (((152170 + 65536) * q ) >> 16);
+ * is equal to 
+ *   -ceil(p) + q
+ * where
+ *   p = log(5**-q)/log(2) = -q * log(5)/log(2)
  */
-  fastfloat_really_inline unsigned int power(int q)  noexcept  {
+  fastfloat_really_inline int power(int q)  noexcept  {
     return (((152170 + 65536) * q) >> 16) + 63;
   }
 } // namespace
 
 // w * 10 ** q
 // The returned value should be a valid ieee64 number that simply need to be packed.
-// However, in some very rare cases, the computation will fail. In such cases, we 
+// However, in some very rare cases, the computation will fail. In such cases, we
 // return an adjusted_mantissa with a negative power of 2: the caller should recompute
 // in such cases.
 template <typename binary>
 fastfloat_really_inline
 adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   adjusted_mantissa answer;
-  if ((w == 0) || (q < smallest_power_of_five) ){
+  if ((w == 0) || (q < smallest_power_of_five)) {
     answer.power2 = 0;
     answer.mantissa = 0;
     // result should be zero
@@ -95,19 +98,18 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   if(product.low == 0xFFFFFFFFFFFFFFFF) { //  could guard it further
     // In some very rare cases, this could happen, in which case we might need a more accurate
     // computation that what we can provide cheaply. This is very, very unlikely.
-    answer.power2 = -1;
+    answer.power2 = -1; // This (a negative value) indicates an error condition.
     return answer;
   }
   // The "compute_product_approximation" function can be slightly slower than a branchless approach:
   // value128 product = compute_product(q, w);
   // but in practice, we can win big with the compute_product_approximation if its additional branch
   // is easily predicted. Which is best is data specific.
-  uint64_t upperbit = product.high >> 63;
+  int upperbit = int(product.high >> 63);
 
   answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
-  lz += int(1 ^ upperbit);
-  answer.power2 = power(int(q)) - lz - binary::minimum_exponent() + 1;
 
+  answer.power2 = int(power(int(q)) + upperbit - lz - binary::minimum_exponent());
   if (answer.power2 <= 0) { // we have a subnormal?
     // Here have that answer.power2 <= 0 so -answer.power2 >= 0
     if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
@@ -115,8 +117,8 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
       answer.mantissa = 0;
       // result should be zero
       return answer;
-    } 
-    // next line is safe because -answer.power2 + 1 < 0
+    }
+    // next line is safe because -answer.power2 + 1 < 64
     answer.mantissa >>= -answer.power2 + 1;
     // Thankfully, we can't have both "round-to-even" and subnormals because
     // "round-to-even" only occurs for powers close to 0.
@@ -136,13 +138,13 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   // usually, we round *up*, but if we fall right in between and and we have an
   // even basis, we need to round down
   // We are only concerned with the cases where 5**q fits in single 64-bit word.
-  if ((product.low <= 1) &&  (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && 
+  if ((product.low <= 1) &&  (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) &&
       ((answer.mantissa & 3) == 1) ) { // we may fall between two floats!
     // To be in-between two floats we need that in doing
     //   answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
     // ... we dropped out only zeroes. But if this happened, then we can go back!!!
     if((answer.mantissa  << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) ==  product.high) {
-      answer.mantissa &= ~1;          // flip it so that we do not round up
+      answer.mantissa &= ~uint64_t(1);          // flip it so that we do not round up
     }
   }
 
@@ -161,7 +163,8 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   return answer;
 }
 
+
 } // namespace fast_float
-}  // namespace arrow_vendored
+} // namespace arrow_vendored
 
 #endif
diff --git a/cpp/src/arrow/vendored/fast_float/fast_float.h b/cpp/src/arrow/vendored/fast_float/fast_float.h
index 0e7acf5f84d..3e39cac90cf 100644
--- a/cpp/src/arrow/vendored/fast_float/fast_float.h
+++ b/cpp/src/arrow/vendored/fast_float/fast_float.h
@@ -20,19 +20,19 @@ struct from_chars_result {
 
 /**
  * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
- * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. 
- * The resulting floating-point value is the closest floating-point values (using either float or double), 
+ * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale.
+ * The resulting floating-point value is the closest floating-point values (using either float or double),
  * using the "round to even" convention for values that would otherwise fall right in-between two values.
  * That is, we provide exact parsing according to the IEEE standard.
- * 
+ *
  * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the
  * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned
  * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored.
- * 
+ *
  * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`).
- * 
+ *
  * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of
- * the type `fast_float::chars_format`. It is a bitset value: we check whether 
+ * the type `fast_float::chars_format`. It is a bitset value: we check whether
  * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set
  * to determine whether we allowe the fixed point and scientific notation respectively.
  * The default is  `fast_float::chars_format::general` which allows both `fixed` and `scientific`.
@@ -42,6 +42,7 @@ from_chars_result from_chars(const char *first, const char *last,
                              T &value, chars_format fmt = chars_format::general)  noexcept;
 
 }
-}  // namespace arrow_vendored
+} // namespace arrow_vendored
+
 #include "parse_number.h"
 #endif // FASTFLOAT_FAST_FLOAT_H
diff --git a/cpp/src/arrow/vendored/fast_float/fast_table.h b/cpp/src/arrow/vendored/fast_float/fast_table.h
index ac34fe7cefc..97f722a3648 100644
--- a/cpp/src/arrow/vendored/fast_float/fast_table.h
+++ b/cpp/src/arrow/vendored/fast_float/fast_table.h
@@ -20,18 +20,18 @@ namespace fast_float {
  * The smallest non-zero float (binary64) is 2^−1074.
  * We take as input numbers of the form w x 10^q where w < 2^64.
  * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
- * However, we have that 
+ * However, we have that
  * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^−1074.
- * Thus it is possible for a number of the form w * 10^-342 where 
+ * Thus it is possible for a number of the form w * 10^-342 where
  * w is a 64-bit value to be a non-zero floating-point number.
  *********
- * Any number of form w * 10^309 where w>= 1 is going to be 
+ * Any number of form w * 10^309 where w>= 1 is going to be
  * infinite in binary64 so we never need to worry about powers
  * of 5 greater than 308.
  */
 constexpr int smallest_power_of_five = -342;
 constexpr int largest_power_of_five = 308;
-// truncated powers of five from 5^-344 all the way to 5^308
+// Powers of five from 5^-342 all the way to 5^308 rounded toward one.
 const uint64_t power_of_five_128[]= {
         0xeef453d6923bd65a,0x113faa2906a13b3f,
         0x9558b4661b6565f8,0x4ac7ca59a424c507,
@@ -348,16 +348,16 @@ const uint64_t power_of_five_128[]= {
         0xa2425ff75e14fc31,0xa1258379a94d028d,
         0xcad2f7f5359a3b3e,0x96ee45813a04330,
         0xfd87b5f28300ca0d,0x8bca9d6e188853fc,
-        0x9e74d1b791e07e48,0x775ea264cf55347e,
-        0xc612062576589dda,0x95364afe032a81a0,
-        0xf79687aed3eec551,0x3a83ddbd83f52210,
-        0x9abe14cd44753b52,0xc4926a9672793580,
-        0xc16d9a0095928a27,0x75b7053c0f178400,
-        0xf1c90080baf72cb1,0x5324c68b12dd6800,
-        0x971da05074da7bee,0xd3f6fc16ebca8000,
-        0xbce5086492111aea,0x88f4bb1ca6bd0000,
-        0xec1e4a7db69561a5,0x2b31e9e3d0700000,
-        0x9392ee8e921d5d07,0x3aff322e62600000,
+        0x9e74d1b791e07e48,0x775ea264cf55347d,
+        0xc612062576589dda,0x95364afe032a819d,
+        0xf79687aed3eec551,0x3a83ddbd83f52204,
+        0x9abe14cd44753b52,0xc4926a9672793542,
+        0xc16d9a0095928a27,0x75b7053c0f178293,
+        0xf1c90080baf72cb1,0x5324c68b12dd6338,
+        0x971da05074da7bee,0xd3f6fc16ebca5e03,
+        0xbce5086492111aea,0x88f4bb1ca6bcf584,
+        0xec1e4a7db69561a5,0x2b31e9e3d06c32e5,
+        0x9392ee8e921d5d07,0x3aff322e62439fcf,
         0xb877aa3236a4b449,0x9befeb9fad487c3,
         0xe69594bec44de15b,0x4c2ebe687989a9b4,
         0x901d7cf73ab0acd9,0xf9d37014bf60a11,
@@ -686,6 +686,6 @@ const uint64_t power_of_five_128[]= {
         0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,};
 
 }
-}  // namespace arrow_vendored
+} // namespace arrow_vendored
 
 #endif
diff --git a/cpp/src/arrow/vendored/fast_float/float_common.h b/cpp/src/arrow/vendored/fast_float/float_common.h
index 4d82e8769c2..6127fe69492 100644
--- a/cpp/src/arrow/vendored/fast_float/float_common.h
+++ b/cpp/src/arrow/vendored/fast_float/float_common.h
@@ -3,47 +3,101 @@
 
 #include <cfloat>
 #include <cstdint>
-#ifndef _WIN32
-// strcasecmp, strncasecmp 
-#include <strings.h>
+#include <cassert>
+
+#if (defined(__i386) || defined(__i386__) || defined(_M_IX86)   \
+     || defined(__arm__)                                        \
+     || defined(__MINGW32__))
+#define FASTFLOAT_32BIT
+#elif (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64)   \
+       || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \
+       || defined(__MINGW64__)                                          \
+       || defined(__s390x__)                                            \
+       || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)))
+#define FASTFLOAT_64BIT
+#else
+#error Unknown platform
 #endif
 
-#ifdef _MSC_VER
-#define fastfloat_really_inline __forceinline
-#else
-#define fastfloat_really_inline inline __attribute__((always_inline))
-#endif 
+#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__))
+#include <intrin.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define FASTFLOAT_VISUAL_STUDIO 1
+#endif
 
 #ifdef _WIN32
-#define fastfloat_strcasecmp _stricmp
-#define fastfloat_strncasecmp _strnicmp
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include <machine/endian.h>
 #else
-#define fastfloat_strcasecmp strcasecmp
-#define fastfloat_strncasecmp strncasecmp
+#include <endian.h>
+#endif
+#
+#ifndef __BYTE_ORDER__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#ifndef __ORDER_LITTLE_ENDIAN__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#define FASTFLOAT_IS_BIG_ENDIAN 1
 #endif
-namespace arrow_vendored {
-namespace fast_float {
-#ifndef FLT_EVAL_METHOD
-#error "FLT_EVAL_METHOD should be defined, please include cfloat."
 #endif
 
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#define fastfloat_really_inline __forceinline
+#else
+#define fastfloat_really_inline inline __attribute__((always_inline))
+#endif
 
+namespace arrow_vendored {
+namespace fast_float {
 
+// Compares two ASCII strings in a case insensitive manner.
+inline bool fastfloat_strncasecmp(const char *input1, const char *input2,
+                                  size_t length) {
+  char running_diff{0};
+  for (size_t i = 0; i < length; i++) {
+    running_diff |= (input1[i] ^ input2[i]);
+  }
+  return (running_diff == 0) || (running_diff == 32);
+}
 
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
 
-
-bool is_space(uint8_t c) {
-    static const bool table[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-    return table[c];
+inline bool is_space(uint8_t c) {
+  static const bool table[] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  return table[c];
 }
 
 namespace {
 constexpr uint32_t max_digits = 768;
-
+constexpr uint32_t max_digit_without_overflow = 19;
 constexpr int32_t decimal_point_range = 2047;
 } // namespace
 
-
 struct value128 {
   uint64_t low;
   uint64_t high;
@@ -51,35 +105,44 @@ struct value128 {
   value128() : low(0), high(0) {}
 };
 
-
 /* result might be undefined when input_num is zero */
-fastfloat_really_inline 
-int leading_zeroes(uint64_t input_num) {
-#ifdef _MSC_VER
+fastfloat_really_inline int leading_zeroes(uint64_t input_num) {
+  assert(input_num > 0);
+#ifdef FASTFLOAT_VISUAL_STUDIO
+  #if defined(_M_X64) || defined(_M_ARM64)
   unsigned long leading_zero = 0;
   // Search the mask data from most significant bit (MSB)
   // to least significant bit (LSB) for a set bit (1).
-  if (_BitScanReverse64(&leading_zero, input_num))
-    return (int)(63 - leading_zero);
-  else
-    return 64;
+  _BitScanReverse64(&leading_zero, input_num);
+  return (int)(63 - leading_zero);
+  #else
+  int last_bit = 0;
+  if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, last_bit |= 32;
+  if(input_num & uint64_t(        0xffff0000)) input_num >>= 16, last_bit |= 16;
+  if(input_num & uint64_t(            0xff00)) input_num >>=  8, last_bit |=  8;
+  if(input_num & uint64_t(              0xf0)) input_num >>=  4, last_bit |=  4;
+  if(input_num & uint64_t(               0xc)) input_num >>=  2, last_bit |=  2;
+  if(input_num & uint64_t(               0x2)) input_num >>=  1, last_bit |=  1;
+  return 63 - last_bit;
+  #endif
 #else
   return __builtin_clzll(input_num);
 #endif
 }
 
+#ifdef FASTFLOAT_32BIT
 
-#if defined(_WIN32) && !defined(__clang__)
-// Note MinGW falls here too
-#include <intrin.h>
-
-#if !defined(_M_X64) && !defined(_M_ARM64)// _umul128 for x86, arm
-// this is a slow emulation routine for 32-bit Windows
-//
+#if (!defined(_WIN32)) || defined(__MINGW32__)
+// slow emulation routine for 32-bit
 fastfloat_really_inline uint64_t __emulu(uint32_t x, uint32_t y) {
-  return x * (uint64_t)y;
+    return x * (uint64_t)y;
 }
-fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+#endif
+
+// slow emulation routine for 32-bit
+#if !defined(__MINGW64__)
+fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd,
+                                          uint64_t *hi) {
   uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
   uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
   uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
@@ -89,56 +152,103 @@ fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi
         (adbc_carry << 32) + !!(lo < bd);
   return lo;
 }
-#endif
+#endif // !__MINGW64__
 
-fastfloat_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
-  value128 answer;
-#ifdef _M_ARM64
-  // ARM64 has native support for 64-bit multiplications, no need to emultate
-  answer.high = __umulh(value1, value2);
-  answer.low = value1 * value2;
-#else
-  answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
-#endif // _M_ARM64
-  return answer;
-}
+#endif // FASTFLOAT_32BIT
 
-#else
 
-// compute value1 * value2
-fastfloat_really_inline
-value128 full_multiplication(uint64_t value1, uint64_t value2) {
+// compute 64-bit a*b
+fastfloat_really_inline value128 full_multiplication(uint64_t a,
+                                                     uint64_t b) {
   value128 answer;
-  __uint128_t r = ((__uint128_t)value1) * value2;
+#ifdef _M_ARM64
+  // ARM64 has native support for 64-bit multiplications, no need to emulate
+  answer.high = __umulh(a, b);
+  answer.low = a * b;
+#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64))
+  answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64
+#elif defined(FASTFLOAT_64BIT)
+  __uint128_t r = ((__uint128_t)a) * b;
   answer.low = uint64_t(r);
   answer.high = uint64_t(r >> 64);
+#else
+  #error Not implemented
+#endif
   return answer;
 }
 
-#endif
 
 struct adjusted_mantissa {
-  uint64_t mantissa;
-  int power2;
-  adjusted_mantissa() : mantissa(0), power2(0) {}
+  uint64_t mantissa{0};
+  int power2{0}; // a negative value indicate an invalid result
+  adjusted_mantissa() = default;
+  // bool operator==(const adjusted_mantissa &o) const = default;
+  bool operator==(const adjusted_mantissa &o) const {
+    return mantissa == o.mantissa && power2 == o.power2;
+  }
 };
 
 struct decimal {
-  uint32_t num_digits;
-  int32_t decimal_point;
-  bool negative;
-  bool truncated;
+  uint32_t num_digits{0};
+  int32_t decimal_point{0};
+  bool negative{false};
+  bool truncated{false};
   uint8_t digits[max_digits];
+  decimal() = default;
+  // Copies are not allowed since this is a fat object.
+  decimal(const decimal &) = delete;
+  // Copies are not allowed since this is a fat object.
+  decimal &operator=(const decimal &) = delete;
+  // Moves are allowed:
+  decimal(decimal &&) = default;
+  decimal &operator=(decimal &&other) = default;
+  // Generates a mantissa by truncating to 19 digits.
+  // This function should be reasonably fast.
+  // Note that the user is responsible to ensure that digits are
+  // initialized to zero when there are fewer than 19.
+  inline uint64_t to_truncated_mantissa() {
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+    uint64_t mantissa = 0;
+    for (uint32_t i = 0; i < max_digit_without_overflow;
+         i++) {
+      mantissa = mantissa * 10 + digits[i]; // can be accelerated
+    }
+    return mantissa;
+#else
+    uint64_t val;
+    // 8 first digits
+    ::memcpy(&val, digits, sizeof(uint64_t));
+    val = val * 2561 >> 8;
+    val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+    uint64_t mantissa =
+        uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+    // 8 more digits for a total of 16
+    ::memcpy(&val, digits + sizeof(uint64_t), sizeof(uint64_t));
+    val = val * 2561 >> 8;
+    val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
+    uint32_t eight_digits_value =
+        uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
+    mantissa = 100000000 * mantissa + eight_digits_value;
+    for (uint32_t i = 2 * sizeof(uint64_t); i < max_digit_without_overflow;
+         i++) {
+      mantissa = mantissa * 10 + digits[i]; // can be accelerated
+    }
+    return mantissa;
+#endif
+  }
+  // Generate san exponent matching to_truncated_mantissa()
+  inline int32_t to_truncated_exponent() {
+    return decimal_point - int32_t(max_digit_without_overflow);
+  }
 };
 
 constexpr static double powers_of_ten_double[] = {
     1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
     1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
-constexpr static float powers_of_ten_float[] = {
-    1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10};
+constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
+                                                1e6, 1e7, 1e8, 1e9, 1e10};
 
-template <typename T>
-struct binary_format {
+template <typename T> struct binary_format {
   static constexpr int mantissa_explicit_bits();
   static constexpr int minimum_exponent();
   static constexpr int infinite_power();
@@ -151,73 +261,54 @@ struct binary_format {
   static constexpr T exact_power_of_ten(int64_t power);
 };
 
-template <>
-constexpr int binary_format<double>::mantissa_explicit_bits() {
+template <> constexpr int binary_format<double>::mantissa_explicit_bits() {
   return 52;
 }
-template <>
-constexpr int binary_format<float>::mantissa_explicit_bits() { 
+template <> constexpr int binary_format<float>::mantissa_explicit_bits() {
   return 23;
 }
 
-template <>
-constexpr int binary_format<double>::max_exponent_round_to_even() {
+template <> constexpr int binary_format<double>::max_exponent_round_to_even() {
   return 23;
 }
 
-template <>
-constexpr int binary_format<float>::max_exponent_round_to_even() {
+template <> constexpr int binary_format<float>::max_exponent_round_to_even() {
   return 10;
 }
 
-
-template <>
-constexpr int binary_format<double>::min_exponent_round_to_even() {
+template <> constexpr int binary_format<double>::min_exponent_round_to_even() {
   return -4;
 }
 
-template <>
-constexpr int binary_format<float>::min_exponent_round_to_even() {
+template <> constexpr int binary_format<float>::min_exponent_round_to_even() {
   return -17;
 }
 
-template <>
-constexpr int binary_format<double>::minimum_exponent() { 
+template <> constexpr int binary_format<double>::minimum_exponent() {
   return -1023;
 }
-template <>
-constexpr int binary_format<float>::minimum_exponent() {
+template <> constexpr int binary_format<float>::minimum_exponent() {
   return -127;
 }
 
-template <>
-constexpr int binary_format<double>::infinite_power() {
-  return 0x7FF; 
+template <> constexpr int binary_format<double>::infinite_power() {
+  return 0x7FF;
 }
-template <>
-constexpr int binary_format<float>::infinite_power() { 
+template <> constexpr int binary_format<float>::infinite_power() {
   return 0xFF;
 }
 
-template <>
-constexpr int binary_format<double>::sign_index() { 
-  return 63;
-}
-template <>
-constexpr int binary_format<float>::sign_index() {
-  return 31;
-}
+template <> constexpr int binary_format<double>::sign_index() { return 63; }
+template <> constexpr int binary_format<float>::sign_index() { return 31; }
 
-template <>
-constexpr int binary_format<double>::min_exponent_fast_path() { 
+template <> constexpr int binary_format<double>::min_exponent_fast_path() {
 #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
   return 0;
 #else
   return -22;
 #endif
 }
-template <>
-constexpr int binary_format<float>::min_exponent_fast_path() {
+template <> constexpr int binary_format<float>::min_exponent_fast_path() {
 #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
   return 0;
 #else
@@ -225,28 +316,22 @@ constexpr int binary_format<float>::min_exponent_fast_path() {
 #endif
 }
 
-
-template <>
-constexpr int binary_format<double>::max_exponent_fast_path() { 
+template <> constexpr int binary_format<double>::max_exponent_fast_path() {
   return 22;
 }
-template <>
-constexpr int binary_format<float>::max_exponent_fast_path() {
+template <> constexpr int binary_format<float>::max_exponent_fast_path() {
   return 10;
 }
 
-
-template <>
-constexpr uint64_t binary_format<double>::max_mantissa_fast_path() { 
+template <> constexpr uint64_t binary_format<double>::max_mantissa_fast_path() {
   return uint64_t(2) << mantissa_explicit_bits();
 }
-template <>
-constexpr uint64_t binary_format<float>::max_mantissa_fast_path() {
+template <> constexpr uint64_t binary_format<float>::max_mantissa_fast_path() {
   return uint64_t(2) << mantissa_explicit_bits();
 }
 
 template <>
-constexpr double binary_format<double>::exact_power_of_ten(int64_t power) { 
+constexpr double binary_format<double>::exact_power_of_ten(int64_t power) {
   return powers_of_ten_double[power];
 }
 template <>
@@ -255,9 +340,18 @@ constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
   return powers_of_ten_float[power];
 }
 
- 
+// for convenience:
+#include <ostream>
+inline std::ostream &operator<<(std::ostream &out, const fast_float::decimal &d) {
+  out << "0.";
+  for (size_t i = 0; i < d.num_digits; i++) {
+    out << int32_t(d.digits[i]);
+  }
+  out << " * 10 ** " << d.decimal_point;
+  return out;
+}
 
 } // namespace fast_float
-}  // namespace arrow_vendored
+} // namespace arrow_vendored
 
 #endif
diff --git a/cpp/src/arrow/vendored/fast_float/parse_number.h b/cpp/src/arrow/vendored/fast_float/parse_number.h
index f27a6d82f91..a570a5cbfbc 100644
--- a/cpp/src/arrow/vendored/fast_float/parse_number.h
+++ b/cpp/src/arrow/vendored/fast_float/parse_number.h
@@ -2,7 +2,7 @@
 #define FASTFLOAT_PARSE_NUMBER_H
 #include "ascii_number.h"
 #include "decimal_to_binary.h"
-#include "thompson_tao.h"
+#include "simple_decimal_conversion.h"
 
 #include <cassert>
 #include <cmath>
@@ -17,7 +17,7 @@ namespace fast_float {
 namespace {
 /**
  * Special case +inf, -inf, nan, infinity, -infinity.
- * The case comparisons could be made much faster given that we know that the 
+ * The case comparisons could be made much faster given that we know that the
  * strings a null-free and fixed.
  **/
 template <typename T>
@@ -25,14 +25,13 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value)  n
   from_chars_result answer;
   answer.ec = std::errc(); // be optimistic
   if (last - first >= 3) {
-    if (fastfloat_strncasecmp(first, "nan", 3) == 0) {
+    if (fastfloat_strncasecmp(first, "nan", 3)) {
       answer.ptr = first + 3;
       value = std::numeric_limits<T>::quiet_NaN();
       return answer;
     }
-    if (fastfloat_strncasecmp(first, "inf", 3) == 0) {
-
-      if ((last - first >= 8) && (fastfloat_strncasecmp(first, "infinity", 8) == 0)) {
+    if (fastfloat_strncasecmp(first, "inf", 3)) {
+      if ((last - first >= 8) && fastfloat_strncasecmp(first, "infinity", 8)) {
         answer.ptr = first + 8;
       } else {
         answer.ptr = first + 3;
@@ -41,7 +40,7 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value)  n
       return answer;
     }
     if (last - first >= 4) {
-      if ((fastfloat_strncasecmp(first, "+nan", 4) == 0) || (fastfloat_strncasecmp(first, "-nan", 4) == 0)) {
+      if (fastfloat_strncasecmp(first, "+nan", 4) || fastfloat_strncasecmp(first, "-nan", 4)) {
         answer.ptr = first + 4;
         value = std::numeric_limits<T>::quiet_NaN();
         if (first[0] == '-') {
@@ -50,8 +49,8 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value)  n
         return answer;
       }
 
-      if ((fastfloat_strncasecmp(first, "+inf", 4) == 0) || (fastfloat_strncasecmp(first, "-inf", 4) == 0)) {
-        if ((last - first >= 8) && (fastfloat_strncasecmp(first + 1, "infinity", 8) == 0)) {
+      if (fastfloat_strncasecmp(first, "+inf", 4) || fastfloat_strncasecmp(first, "-inf", 4)) {
+        if ((last - first >= 8) && fastfloat_strncasecmp(first + 1, "infinity", 8)) {
           answer.ptr = first + 9;
         } else {
           answer.ptr = first + 4;
@@ -65,6 +64,7 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value)  n
     }
   }
   answer.ec = std::errc::invalid_argument;
+  answer.ptr = first;
   return answer;
 }
 } // namespace
@@ -78,7 +78,7 @@ from_chars_result from_chars(const char *first, const char *last,
 
 
   from_chars_result answer;
-  while ((first != last) && fast_float::is_space(*first)) {
+  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
     first++;
   }
   if (first == last) {
@@ -95,24 +95,33 @@ from_chars_result from_chars(const char *first, const char *last,
 
   if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
     value = T(pns.mantissa);
-    if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); } 
+    if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
     else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
     if (pns.negative) { value = -value; }
     return answer;
   }
   adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa<binary_format<T>>(first,last) : compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
-  if(am.power2 < 0) {
-    am = parse_long_mantissa<binary_format<T>>(first,last);
-  }
+  // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
+  // then we need to go the long way around again. This is very uncommon.
+  if(am.power2 < 0) { am = parse_long_mantissa<binary_format<T>>(first,last); }
   uint64_t word = am.mantissa;
   word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
-  word = pns.negative 
+  word = pns.negative
   ? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
-  memcpy(&value, &word, sizeof(T));
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+   if (std::is_same<T, float>::value) {
+     ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
+   } else {
+     ::memcpy(&value, &word, sizeof(T));
+   }
+#else
+   // For little-endian systems:
+   ::memcpy(&value, &word, sizeof(T));
+#endif
   return answer;
 }
 
 } // namespace fast_float
-}  // namespace arrow_vendored
+} // namespace arrow_vendored
 
 #endif
diff --git a/cpp/src/arrow/vendored/fast_float/thompson_tao.h b/cpp/src/arrow/vendored/fast_float/simple_decimal_conversion.h
similarity index 91%
rename from cpp/src/arrow/vendored/fast_float/thompson_tao.h
rename to cpp/src/arrow/vendored/fast_float/simple_decimal_conversion.h
index c9ec1870c99..2dc3ee4d380 100644
--- a/cpp/src/arrow/vendored/fast_float/thompson_tao.h
+++ b/cpp/src/arrow/vendored/fast_float/simple_decimal_conversion.h
@@ -3,14 +3,15 @@
 
 /**
  * This code is meant to handle the case where we have more than 19 digits.
- * 
- * Based on work by Nigel Tao (at https://github.com/google/wuffs/)
+ *
+ * It is based on work by Nigel Tao (at https://github.com/google/wuffs/)
  * who credits Ken Thompson for the design (via a reference to the Go source
- * code). See
- * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c
- * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c
+ * code).
+ *
+ * Rob Pike suggested that this algorithm be called "Simple Decimal Conversion".
+ *
  * It is probably not very fast but it is a fallback that should almost never
- * be used in reallife.
+ * be used in real life. Though it is not fast, it is "easily" understood and debugged.
  **/
 #include "ascii_number.h"
 #include "decimal_to_binary.h"
@@ -28,22 +29,9 @@ inline void trim(decimal &h) {
   }
 }
 
-#if 0
-/** If you ever want to see what is going on, the following function might prove handy:
- * **/
-void print(const decimal d, int32_t exp2 = 0) {
-  printf("0.");
-  for(size_t i = 0; i < d.num_digits; i++) {
-    printf("%d", int(d.digits[i]));
-  }
-  printf(" * 10 **%d ", d.decimal_point);
-  printf(" * 2 **%d ", exp2);
-}
-#endif
-
 
 
-uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
+uint32_t number_of_digits_decimal_left_shift(const decimal &h, uint32_t shift) {
   shift &= 63;
   const static uint16_t number_of_digits_decimal_left_shift_table[65] = {
     0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
@@ -136,8 +124,6 @@ uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
   return num_new_digits;
 }
 
-} // end of anonymous namespace
-
 uint64_t round(decimal &h) {
   if ((h.num_digits == 0) || (h.decimal_point < 0)) {
     return 0;
@@ -152,7 +138,7 @@ uint64_t round(decimal &h) {
   }
   bool round_up = false;
   if (dp < h.num_digits) {
-    round_up = h.digits[dp] >= 5; // normally, we round up    
+    round_up = h.digits[dp] >= 5; // normally, we round up  
     // but we may need to round to even!
     if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) {
       round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1]));
@@ -253,6 +239,7 @@ void decimal_right_shift(decimal &h, uint32_t shift) {
   trim(h);
 }
 
+} // end of anonymous namespace
 
 template <typename binary>
 adjusted_mantissa compute_float(decimal &d) {
@@ -266,21 +253,21 @@ adjusted_mantissa compute_float(decimal &d) {
   // At this point, going further, we can assume that d.num_digits > 0.
   //
   // We want to guard against excessive decimal point values because
-  // they can result in long running times. Indeed, we do 
+  // they can result in long running times. Indeed, we do
   // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
   // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
   // fine (runs for a long time).
   //
   if(d.decimal_point < -324) {
     // We have something smaller than 1e-324 which is always zero
-    // in binary64 and binary32. 
+    // in binary64 and binary32.
     // It should be zero.
     answer.power2 = 0;
     answer.mantissa = 0;
     return answer;
   } else if(d.decimal_point >= 310) {
     // We have something at least as large as 0.1e310 which is
-    // always infinite.    
+    // always infinite.  
     answer.power2 = binary::infinite_power();
     answer.mantissa = 0;
     return answer;
@@ -367,9 +354,20 @@ adjusted_mantissa compute_float(decimal &d) {
 template <typename binary>
 adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
     decimal d = parse_decimal(first, last);
+    // In some cases we can get lucky and looking at only the first 19 digits is enough.
+    // Let us try that.
+    const uint64_t mantissa = d.to_truncated_mantissa();
+    const int64_t exponent =  d.to_truncated_exponent();
+    // credit: R. Oudompheng who first implemented this fast path (to my knowledge).
+    // It is rough, but it does the job of accelerating the slow path since most
+    // long streams of digits are determined after 19 digits.
+    adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
+    adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
+    // They must both agree and be both a successful result.
+    if(( am1 == am2 ) && (am1.power2 >= 0)) { return am1; }
     return compute_float<binary>(d);
 }
 
 } // namespace fast_float
-}  // namespace arrow_vendored
+} // namespace arrow_vendored
 #endif