Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 39 additions & 36 deletions cpp/src/arrow/ipc/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
#include "arrow/util/parsing.h"
#include "arrow/util/string.h"
#include "arrow/visitor_inline.h"

Expand Down Expand Up @@ -398,36 +399,34 @@ class ArrayWriter {
}

template <typename T>
typename std::enable_if<IsSignedInt<T>::value, void>::type WriteDataValues(
const T& arr) {
static const char null_string[] = "0";
enable_if_integer_repr<typename T::TypeClass> WriteDataValues(const T& arr) {
using c_type = typename T::TypeClass::c_type;
const auto data = arr.raw_values();
for (int64_t i = 0; i < arr.length(); ++i) {
if (arr.IsValid(i)) {
writer_->Int64(data[i]);
} else {
writer_->RawNumber(null_string, sizeof(null_string));

if (sizeof(c_type) < sizeof(int64_t)) {
for (int64_t i = 0; i < arr.length(); ++i) {
if (arr.IsValid(i)) {
if (std::is_signed<c_type>::value) {
writer_->Int(static_cast<int32_t>(data[i]));
} else {
writer_->Uint(static_cast<uint32_t>(data[i]));
}
} else {
writer_->RawNumber("0", sizeof("0"));
}
}
return;
}
}

template <typename T>
typename std::enable_if<IsUnsignedInt<T>::value, void>::type WriteDataValues(
const T& arr) {
static const char null_string[] = "0";
const auto data = arr.raw_values();
// write strings instead of numbers since numbers can't represent all 64 bit integers
for (int64_t i = 0; i < arr.length(); ++i) {
if (arr.IsValid(i)) {
writer_->Uint64(data[i]);
} else {
writer_->RawNumber(null_string, sizeof(null_string));
}
auto str = std::to_string(data[i]);
writer_->String(str.c_str(), static_cast<rj::SizeType>(str.size()));
}
}

template <typename T>
typename std::enable_if<IsFloatingPoint<T>::value, void>::type WriteDataValues(
const T& arr) {
enable_if_floating_point<typename T::TypeClass> WriteDataValues(const T& arr) {
static const char null_string[] = "0.";
const auto data = arr.raw_values();
for (int64_t i = 0; i < arr.length(); ++i) {
Expand Down Expand Up @@ -1054,30 +1053,34 @@ static Status GetField(const rj::Value& obj, DictionaryMemo* dictionary_memo,
return Status::OK();
}

template <typename T>
inline typename std::enable_if<IsSignedInt<T>::value, typename T::c_type>::type
UnboxValue(const rj::Value& val) {
DCHECK(val.IsInt64());
return static_cast<typename T::c_type>(val.GetInt64());
}
template <typename T, typename CType = typename T::c_type>
enable_if_integer_repr<T, CType> UnboxValue(
const rj::Value& val,
typename std::enable_if<!std::is_same<T, HalfFloatType>::value>::type* = nullptr) {
if (sizeof(CType) < sizeof(int64_t)) {
if (IsSignedInt<T>::value) {
DCHECK(val.IsInt());
return static_cast<CType>(val.GetInt());
}
DCHECK(val.IsUint());
return static_cast<CType>(val.GetUint());
}

template <typename T>
inline typename std::enable_if<IsUnsignedInt<T>::value, typename T::c_type>::type
UnboxValue(const rj::Value& val) {
DCHECK(val.IsUint());
return static_cast<typename T::c_type>(val.GetUint64());
::arrow::internal::StringConverter<typename CTypeTraits<CType>::ArrowType> converter;
CType out;
auto success = converter(val.GetString(), val.GetStringLength(), &out);
DCHECK(success);
return out;
}

template <typename T>
inline typename std::enable_if<IsFloatingPoint<T>::value, typename T::c_type>::type
UnboxValue(const rj::Value& val) {
enable_if_floating_point<T, typename T::c_type> UnboxValue(const rj::Value& val) {
DCHECK(val.IsFloat());
return static_cast<typename T::c_type>(val.GetDouble());
}

template <typename T>
inline typename std::enable_if<std::is_base_of<BooleanType, T>::value, bool>::type
UnboxValue(const rj::Value& val) {
enable_if_boolean<T, bool> UnboxValue(const rj::Value& val) {
DCHECK(val.IsBool());
return val.GetBool();
}
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/arrow/type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,11 @@ struct CTypeTraits<Optional, enable_if_optional_like<Optional>> {
// Useful type predicates
//

template <typename T>
struct as_void {
using type = void;
};

template <typename T>
using is_number_type = std::is_base_of<NumberType, T>;

Expand Down Expand Up @@ -441,6 +446,16 @@ struct is_8bit_int {
(std::is_same<UInt8Type, T>::value || std::is_same<Int8Type, T>::value);
};

template <typename T, typename CType = void>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should put this in json_internal.cc unless we anticipate using it elsewhere

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do think IsSignedInt etc should be removed in favor of this trait or one like it (though it should probably be renamed).

IsSignedInt takes either an array or a type as a type argument, which is surprisingly atypical for traits. Furthermore whereas is_signed_integer returns false for date and other types which are represented by but not identical to integers IsSignedInt returns true by checking only the c_type, which leads to static_assert(IsSignedInt<HalfFloatType>::value, ""). Finally the declaration of IsSignedInt is far from readable due to nested macro usage

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

struct is_integer_repr_type : std::false_type {};

template <>
struct is_integer_repr_type<HalfFloatType> : std::false_type {};

template <typename T>
struct is_integer_repr_type<T, typename as_void<typename T::c_type>::type>
: std::is_integral<typename T::c_type> {};

template <typename T>
struct is_any_string_type {
static constexpr bool value =
Expand All @@ -457,6 +472,10 @@ using enable_if_primitive_ctype =
template <typename T, typename R = void>
using enable_if_integer = typename std::enable_if<is_integer_type<T>::value, R>::type;

template <typename T, typename R = void>
using enable_if_integer_repr =
typename std::enable_if<is_integer_repr_type<T>::value, R>::type;

template <typename T>
using is_signed_integer =
std::integral_constant<bool, is_integer_type<T>::value &&
Expand Down
12 changes: 11 additions & 1 deletion integration/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,16 @@ def _get_buffers(self):
]


class IntegerColumn(PrimitiveColumn):

def __init__(self, name, count, is_valid, values, bit_width):
super(IntegerColumn, self).__init__(name, count, is_valid, values)
self.bit_width = bit_width

def _encode_value(self, x):
return x if self.bit_width < 64 else str(x)


TEST_INT_MAX = 2 ** 31 - 1
TEST_INT_MIN = ~TEST_INT_MAX

Expand Down Expand Up @@ -225,7 +235,7 @@ def generate_range(self, size, lower, upper, name=None):

if name is None:
name = self.name
return PrimitiveColumn(name, size, is_valid, values)
return IntegerColumn(name, size, is_valid, values, self.bit_width)


class DateType(IntegerType):
Expand Down