apache · bkietz · Sep 3, 2019 · Sep 5, 2019 · Sep 9, 2019 · Sep 10, 2019
diff --git a/cpp/src/arrow/ipc/json_internal.cc b/cpp/src/arrow/ipc/json_internal.cc
@@ -39,6 +39,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/parsing.h"
 #include "arrow/util/string.h"
 #include "arrow/visitor_inline.h"
 
@@ -398,36 +399,34 @@ class ArrayWriter {
   }
 
   template <typename T>
-  typename std::enable_if<IsSignedInt<T>::value, void>::type WriteDataValues(
-      const T& arr) {
-    static const char null_string[] = "0";
+  enable_if_integer_repr<typename T::TypeClass> WriteDataValues(const T& arr) {
+    using c_type = typename T::TypeClass::c_type;
     const auto data = arr.raw_values();
-    for (int64_t i = 0; i < arr.length(); ++i) {
-      if (arr.IsValid(i)) {
-        writer_->Int64(data[i]);
-      } else {
-        writer_->RawNumber(null_string, sizeof(null_string));
+
+    if (sizeof(c_type) < sizeof(int64_t)) {
+      for (int64_t i = 0; i < arr.length(); ++i) {
+        if (arr.IsValid(i)) {
+          if (std::is_signed<c_type>::value) {
+            writer_->Int(static_cast<int32_t>(data[i]));
+          } else {
+            writer_->Uint(static_cast<uint32_t>(data[i]));
+          }
+        } else {
+          writer_->RawNumber("0", sizeof("0"));
+        }
       }
+      return;
     }
-  }
 
-  template <typename T>
-  typename std::enable_if<IsUnsignedInt<T>::value, void>::type WriteDataValues(
-      const T& arr) {
-    static const char null_string[] = "0";
-    const auto data = arr.raw_values();
+    // write strings instead of numbers since numbers can't represent all 64 bit integers
     for (int64_t i = 0; i < arr.length(); ++i) {
-      if (arr.IsValid(i)) {
-        writer_->Uint64(data[i]);
-      } else {
-        writer_->RawNumber(null_string, sizeof(null_string));
-      }
+      auto str = std::to_string(data[i]);
+      writer_->String(str.c_str(), static_cast<rj::SizeType>(str.size()));
     }
   }
 
   template <typename T>
-  typename std::enable_if<IsFloatingPoint<T>::value, void>::type WriteDataValues(
-      const T& arr) {
+  enable_if_floating_point<typename T::TypeClass> WriteDataValues(const T& arr) {
     static const char null_string[] = "0.";
     const auto data = arr.raw_values();
     for (int64_t i = 0; i < arr.length(); ++i) {
@@ -1054,30 +1053,34 @@ static Status GetField(const rj::Value& obj, DictionaryMemo* dictionary_memo,
   return Status::OK();
 }
 
-template <typename T>
-inline typename std::enable_if<IsSignedInt<T>::value, typename T::c_type>::type
-UnboxValue(const rj::Value& val) {
-  DCHECK(val.IsInt64());
-  return static_cast<typename T::c_type>(val.GetInt64());
-}
+template <typename T, typename CType = typename T::c_type>
+enable_if_integer_repr<T, CType> UnboxValue(
+    const rj::Value& val,
+    typename std::enable_if<!std::is_same<T, HalfFloatType>::value>::type* = nullptr) {
+  if (sizeof(CType) < sizeof(int64_t)) {
+    if (IsSignedInt<T>::value) {
+      DCHECK(val.IsInt());
+      return static_cast<CType>(val.GetInt());
+    }
+    DCHECK(val.IsUint());
+    return static_cast<CType>(val.GetUint());
+  }
 
-template <typename T>
-inline typename std::enable_if<IsUnsignedInt<T>::value, typename T::c_type>::type
-UnboxValue(const rj::Value& val) {
-  DCHECK(val.IsUint());
-  return static_cast<typename T::c_type>(val.GetUint64());
+  ::arrow::internal::StringConverter<typename CTypeTraits<CType>::ArrowType> converter;
+  CType out;
+  auto success = converter(val.GetString(), val.GetStringLength(), &out);
+  DCHECK(success);
+  return out;
 }
 
 template <typename T>
-inline typename std::enable_if<IsFloatingPoint<T>::value, typename T::c_type>::type
-UnboxValue(const rj::Value& val) {
+enable_if_floating_point<T, typename T::c_type> UnboxValue(const rj::Value& val) {
   DCHECK(val.IsFloat());
   return static_cast<typename T::c_type>(val.GetDouble());
 }
 
 template <typename T>
-inline typename std::enable_if<std::is_base_of<BooleanType, T>::value, bool>::type
-UnboxValue(const rj::Value& val) {
+enable_if_boolean<T, bool> UnboxValue(const rj::Value& val) {
   DCHECK(val.IsBool());
   return val.GetBool();
 }

diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
@@ -414,6 +414,11 @@ struct CTypeTraits<Optional, enable_if_optional_like<Optional>> {
 // Useful type predicates
 //
 
+template <typename T>
+struct as_void {
+  using type = void;
+};
+
 template <typename T>
 using is_number_type = std::is_base_of<NumberType, T>;
 
@@ -441,6 +446,16 @@ struct is_8bit_int {
       (std::is_same<UInt8Type, T>::value || std::is_same<Int8Type, T>::value);
 };
 
+template <typename T, typename CType = void>
+struct is_integer_repr_type : std::false_type {};
+
+template <>
+struct is_integer_repr_type<HalfFloatType> : std::false_type {};
+
+template <typename T>
+struct is_integer_repr_type<T, typename as_void<typename T::c_type>::type>
+    : std::is_integral<typename T::c_type> {};
+
 template <typename T>
 struct is_any_string_type {
   static constexpr bool value =
@@ -457,6 +472,10 @@ using enable_if_primitive_ctype =
 template <typename T, typename R = void>
 using enable_if_integer = typename std::enable_if<is_integer_type<T>::value, R>::type;
 
+template <typename T, typename R = void>
+using enable_if_integer_repr =
+    typename std::enable_if<is_integer_repr_type<T>::value, R>::type;
+
 template <typename T>
 using is_signed_integer =
     std::integral_constant<bool, is_integer_type<T>::value &&

diff --git a/integration/integration_test.py b/integration/integration_test.py
@@ -179,6 +179,16 @@ def _get_buffers(self):
         ]
 
 
+class IntegerColumn(PrimitiveColumn):
+
+    def __init__(self, name, count, is_valid, values, bit_width):
+        super(IntegerColumn, self).__init__(name, count, is_valid, values)
+        self.bit_width = bit_width
+
+    def _encode_value(self, x):
+        return x if self.bit_width < 64 else str(x)
+
+
 TEST_INT_MAX = 2 ** 31 - 1
 TEST_INT_MIN = ~TEST_INT_MAX
 
@@ -225,7 +235,7 @@ def generate_range(self, size, lower, upper, name=None):
 
         if name is None:
             name = self.name
-        return PrimitiveColumn(name, size, is_valid, values)
+        return IntegerColumn(name, size, is_valid, values, self.bit_width)
 
 
 class DateType(IntegerType):