diff --git a/cpp/src/arrow/ipc/json_internal.cc b/cpp/src/arrow/ipc/json_internal.cc index 1f2c44e7224..cdaf0bbd17e 100644 --- a/cpp/src/arrow/ipc/json_internal.cc +++ b/cpp/src/arrow/ipc/json_internal.cc @@ -39,6 +39,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/logging.h" +#include "arrow/util/parsing.h" #include "arrow/util/string.h" #include "arrow/visitor_inline.h" @@ -398,36 +399,34 @@ class ArrayWriter { } template - typename std::enable_if::value, void>::type WriteDataValues( - const T& arr) { - static const char null_string[] = "0"; + enable_if_integer_repr WriteDataValues(const T& arr) { + using c_type = typename T::TypeClass::c_type; const auto data = arr.raw_values(); - for (int64_t i = 0; i < arr.length(); ++i) { - if (arr.IsValid(i)) { - writer_->Int64(data[i]); - } else { - writer_->RawNumber(null_string, sizeof(null_string)); + + if (sizeof(c_type) < sizeof(int64_t)) { + for (int64_t i = 0; i < arr.length(); ++i) { + if (arr.IsValid(i)) { + if (std::is_signed::value) { + writer_->Int(static_cast(data[i])); + } else { + writer_->Uint(static_cast(data[i])); + } + } else { + writer_->RawNumber("0", sizeof("0")); + } } + return; } - } - template - typename std::enable_if::value, void>::type WriteDataValues( - const T& arr) { - static const char null_string[] = "0"; - const auto data = arr.raw_values(); + // write strings instead of numbers since numbers can't represent all 64 bit integers for (int64_t i = 0; i < arr.length(); ++i) { - if (arr.IsValid(i)) { - writer_->Uint64(data[i]); - } else { - writer_->RawNumber(null_string, sizeof(null_string)); - } + auto str = std::to_string(data[i]); + writer_->String(str.c_str(), static_cast(str.size())); } } template - typename std::enable_if::value, void>::type WriteDataValues( - const T& arr) { + enable_if_floating_point WriteDataValues(const T& arr) { static const char null_string[] = "0."; const auto data = arr.raw_values(); for (int64_t i = 0; i < arr.length(); ++i) { @@ -1054,30 +1053,34 @@ static Status GetField(const rj::Value& obj, DictionaryMemo* dictionary_memo, return Status::OK(); } -template -inline typename std::enable_if::value, typename T::c_type>::type -UnboxValue(const rj::Value& val) { - DCHECK(val.IsInt64()); - return static_cast(val.GetInt64()); -} +template +enable_if_integer_repr UnboxValue( + const rj::Value& val, + typename std::enable_if::value>::type* = nullptr) { + if (sizeof(CType) < sizeof(int64_t)) { + if (IsSignedInt::value) { + DCHECK(val.IsInt()); + return static_cast(val.GetInt()); + } + DCHECK(val.IsUint()); + return static_cast(val.GetUint()); + } -template -inline typename std::enable_if::value, typename T::c_type>::type -UnboxValue(const rj::Value& val) { - DCHECK(val.IsUint()); - return static_cast(val.GetUint64()); + ::arrow::internal::StringConverter::ArrowType> converter; + CType out; + auto success = converter(val.GetString(), val.GetStringLength(), &out); + DCHECK(success); + return out; } template -inline typename std::enable_if::value, typename T::c_type>::type -UnboxValue(const rj::Value& val) { +enable_if_floating_point UnboxValue(const rj::Value& val) { DCHECK(val.IsFloat()); return static_cast(val.GetDouble()); } template -inline typename std::enable_if::value, bool>::type -UnboxValue(const rj::Value& val) { +enable_if_boolean UnboxValue(const rj::Value& val) { DCHECK(val.IsBool()); return val.GetBool(); } diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 0e4439db864..8d4ffb0e4f0 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -414,6 +414,11 @@ struct CTypeTraits> { // Useful type predicates // +template +struct as_void { + using type = void; +}; + template using is_number_type = std::is_base_of; @@ -441,6 +446,16 @@ struct is_8bit_int { (std::is_same::value || std::is_same::value); }; +template +struct is_integer_repr_type : std::false_type {}; + +template <> +struct is_integer_repr_type : std::false_type {}; + +template +struct is_integer_repr_type::type> + : std::is_integral {}; + template struct is_any_string_type { static constexpr bool value = @@ -457,6 +472,10 @@ using enable_if_primitive_ctype = template using enable_if_integer = typename std::enable_if::value, R>::type; +template +using enable_if_integer_repr = + typename std::enable_if::value, R>::type; + template using is_signed_integer = std::integral_constant::value && diff --git a/integration/integration_test.py b/integration/integration_test.py index be20b4ee233..cb5510de50a 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -179,6 +179,16 @@ def _get_buffers(self): ] +class IntegerColumn(PrimitiveColumn): + + def __init__(self, name, count, is_valid, values, bit_width): + super(IntegerColumn, self).__init__(name, count, is_valid, values) + self.bit_width = bit_width + + def _encode_value(self, x): + return x if self.bit_width < 64 else str(x) + + TEST_INT_MAX = 2 ** 31 - 1 TEST_INT_MIN = ~TEST_INT_MAX @@ -225,7 +235,7 @@ def generate_range(self, size, lower, upper, name=None): if name is None: name = self.name - return PrimitiveColumn(name, size, is_valid, values) + return IntegerColumn(name, size, is_valid, values, self.bit_width) class DateType(IntegerType):