Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@ class DictionaryBuilderBase : public ArrayBuilder {
/// \brief The current number of entries in the dictionary
int64_t dictionary_length() const { return memo_table_->size(); }

/// \brief The value byte width (for FixedSizeBinaryType)
template <typename T1 = T>
enable_if_fixed_size_binary<T1, int32_t> byte_width() const {
return byte_width_;
}

Comment on lines +193 to +198
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably fine. Nit: I'd prefer to get the FixedSizeBinaryType inside the converter and use it's byte_width() over propagating the property to builders

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The property is already exposed in FixedSizeBinaryBuilder.

/// \brief Append a scalar value
Status Append(Value value) {
ARROW_RETURN_NOT_OK(Reserve(1));
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ struct DictionaryBuilderCase {
return Create<DictionaryBuilder<LargeStringType>>();
}
Status Visit(const FixedSizeBinaryType&) { return CreateFor<FixedSizeBinaryType>(); }
Status Visit(const Decimal128Type&) { return CreateFor<Decimal128Type>(); }

Status Visit(const DataType& value_type) { return NotImplemented(value_type); }
Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); }
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class MemoryPool;
/// \brief Construct an empty ArrayBuilder corresponding to the data
/// type
/// \param[in] pool the MemoryPool to use for allocations
/// \param[in] type an instance of DictionaryType
/// \param[in] type the data type to create the builder for
/// \param[out] out the created ArrayBuilder
ARROW_EXPORT
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
Expand All @@ -48,7 +48,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
/// \brief Construct an empty DictionaryBuilder initialized optionally
/// with a pre-existing dictionary
/// \param[in] pool the MemoryPool to use for allocations
/// \param[in] type an instance of DictionaryType
/// \param[in] type the dictionary type to create the builder for
/// \param[in] dictionary the initial dictionary, if any. May be nullptr
/// \param[out] out the created ArrayBuilder
ARROW_EXPORT
Expand Down
149 changes: 108 additions & 41 deletions cpp/src/arrow/ipc/json_simple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
#include "arrow/util/string_view.h"
#include "arrow/util/value_parsing.h"

Expand All @@ -50,9 +51,11 @@ namespace json {
using ::arrow::internal::checked_cast;
using ::arrow::internal::checked_pointer_cast;

static constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag;
namespace {

static Status JSONTypeError(const char* expected_type, rj::Type json_type) {
constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag;

Status JSONTypeError(const char* expected_type, rj::Type json_type) {
return Status::Invalid("Expected ", expected_type, " or null, got JSON type ",
json_type);
}
Expand Down Expand Up @@ -101,6 +104,22 @@ class ConcreteConverter : public Converter {
}
return Status::OK();
}

const std::shared_ptr<DataType>& value_type() {
if (type_->id() != Type::DICTIONARY) {
return type_;
}
return checked_cast<const DictionaryType&>(*type_).value_type();
}

template <typename BuilderType>
Status MakeConcreteBuilder(std::shared_ptr<BuilderType>* out) {
std::unique_ptr<ArrayBuilder> builder;
RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
*out = checked_pointer_cast<BuilderType>(std::move(builder));
DCHECK(*out);
return Status::OK();
}
};

// ------------------------------------------------------------------------
Expand Down Expand Up @@ -213,20 +232,17 @@ enable_if_physical_floating_point<T, Status> ConvertNumber(const rj::Value& json
// ------------------------------------------------------------------------
// Converter for int arrays

template <typename Type>
class IntegerConverter final : public ConcreteConverter<IntegerConverter<Type>> {
template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
class IntegerConverter final
: public ConcreteConverter<IntegerConverter<Type, BuilderType>> {
using c_type = typename Type::c_type;

static constexpr auto is_signed = std::is_signed<c_type>::value;

public:
explicit IntegerConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }

Status Init() override {
std::unique_ptr<ArrayBuilder> builder;
RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
builder_ = checked_pointer_cast<NumericBuilder<Type>>(std::move(builder));
return Status::OK();
}
Status Init() override { return this->MakeConcreteBuilder(&builder_); }

Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
Expand All @@ -240,21 +256,20 @@ class IntegerConverter final : public ConcreteConverter<IntegerConverter<Type>>
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }

private:
std::shared_ptr<NumericBuilder<Type>> builder_;
std::shared_ptr<BuilderType> builder_;
};

// ------------------------------------------------------------------------
// Converter for float arrays

template <typename Type>
class FloatConverter final : public ConcreteConverter<FloatConverter<Type>> {
template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
class FloatConverter final : public ConcreteConverter<FloatConverter<Type, BuilderType>> {
using c_type = typename Type::c_type;

public:
explicit FloatConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type;
builder_ = std::make_shared<NumericBuilder<Type>>();
}
explicit FloatConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }

Status Init() override { return this->MakeConcreteBuilder(&builder_); }

Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
Expand All @@ -268,20 +283,22 @@ class FloatConverter final : public ConcreteConverter<FloatConverter<Type>> {
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }

private:
std::shared_ptr<NumericBuilder<Type>> builder_;
std::shared_ptr<BuilderType> builder_;
};

// ------------------------------------------------------------------------
// Converter for decimal arrays

class DecimalConverter final : public ConcreteConverter<DecimalConverter> {
template <typename BuilderType = typename TypeTraits<Decimal128Type>::BuilderType>
class DecimalConverter final : public ConcreteConverter<DecimalConverter<BuilderType>> {
public:
explicit DecimalConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type;
decimal_type_ = checked_cast<Decimal128Type*>(type.get());
builder_ = std::make_shared<DecimalBuilder>(type);
decimal_type_ = &checked_cast<const Decimal128Type&>(*this->value_type());
}

Status Init() override { return this->MakeConcreteBuilder(&builder_); }

Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
return this->AppendNull();
Expand All @@ -303,8 +320,8 @@ class DecimalConverter final : public ConcreteConverter<DecimalConverter> {
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }

private:
std::shared_ptr<DecimalBuilder> builder_;
Decimal128Type* decimal_type_;
std::shared_ptr<BuilderType> builder_;
const Decimal128Type* decimal_type_;
};

// ------------------------------------------------------------------------
Expand Down Expand Up @@ -381,15 +398,13 @@ class DayTimeIntervalConverter final
// ------------------------------------------------------------------------
// Converter for binary and string arrays

template <typename TYPE>
class StringConverter final : public ConcreteConverter<StringConverter<TYPE>> {
template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
class StringConverter final
: public ConcreteConverter<StringConverter<Type, BuilderType>> {
public:
using BuilderType = typename TypeTraits<TYPE>::BuilderType;
explicit StringConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }

explicit StringConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type;
builder_ = std::make_shared<BuilderType>(type, default_memory_pool());
}
Status Init() override { return this->MakeConcreteBuilder(&builder_); }

Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
Expand All @@ -412,14 +427,16 @@ class StringConverter final : public ConcreteConverter<StringConverter<TYPE>> {
// ------------------------------------------------------------------------
// Converter for fixed-size binary arrays

template <typename BuilderType = typename TypeTraits<FixedSizeBinaryType>::BuilderType>
class FixedSizeBinaryConverter final
: public ConcreteConverter<FixedSizeBinaryConverter> {
: public ConcreteConverter<FixedSizeBinaryConverter<BuilderType>> {
public:
explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) {
this->type_ = type;
builder_ = std::make_shared<FixedSizeBinaryBuilder>(type, default_memory_pool());
}

Status Init() override { return this->MakeConcreteBuilder(&builder_); }

Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
return this->AppendNull();
Expand All @@ -441,7 +458,7 @@ class FixedSizeBinaryConverter final
std::shared_ptr<ArrayBuilder> builder() override { return builder_; }

private:
std::shared_ptr<FixedSizeBinaryBuilder> builder_;
std::shared_ptr<BuilderType> builder_;
};

// ------------------------------------------------------------------------
Expand Down Expand Up @@ -720,14 +737,62 @@ class UnionConverter final : public ConcreteConverter<UnionConverter> {
// ------------------------------------------------------------------------
// General conversion functions

Status ConversionNotImplemented(const std::shared_ptr<DataType>& type) {
return Status::NotImplemented("JSON conversion to ", type->ToString(),
" not implemented");
}

Status GetDictConverter(const std::shared_ptr<DataType>& type,
std::shared_ptr<Converter>* out) {
std::shared_ptr<Converter> res;

const auto value_type = checked_cast<const DictionaryType&>(*type).value_type();

#define SIMPLE_CONVERTER_CASE(ID, CLASS, TYPE) \
case ID: \
res = std::make_shared<CLASS<DictionaryBuilder<TYPE>>>(type); \
break;

#define PARAM_CONVERTER_CASE(ID, CLASS, TYPE) \
case ID: \
res = std::make_shared<CLASS<TYPE, DictionaryBuilder<TYPE>>>(type); \
break;

switch (value_type->id()) {
PARAM_CONVERTER_CASE(Type::INT8, IntegerConverter, Int8Type)
PARAM_CONVERTER_CASE(Type::INT16, IntegerConverter, Int16Type)
PARAM_CONVERTER_CASE(Type::INT32, IntegerConverter, Int32Type)
PARAM_CONVERTER_CASE(Type::INT64, IntegerConverter, Int64Type)
PARAM_CONVERTER_CASE(Type::UINT8, IntegerConverter, UInt8Type)
PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type)
PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type)
PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type)
PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType)
PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType)
PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType)
PARAM_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter, LargeBinaryType)
SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter,
FixedSizeBinaryType)
SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter, Decimal128Type)
default:
return ConversionNotImplemented(type);
}

#undef SIMPLE_CONVERTER_CASE
#undef PARAM_CONVERTER_CASE

RETURN_NOT_OK(res->Init());
*out = res;
return Status::OK();
}

Status GetConverter(const std::shared_ptr<DataType>& type,
std::shared_ptr<Converter>* out) {
std::shared_ptr<Converter> res;
if (type->id() == Type::DICTIONARY) {
return GetDictConverter(type, out);
}

auto not_implemented = [&]() -> Status {
return Status::NotImplemented("JSON conversion to ", type->ToString(),
" not implemented");
};
std::shared_ptr<Converter> res;

#define SIMPLE_CONVERTER_CASE(ID, CLASS) \
case ID: \
Expand Down Expand Up @@ -763,14 +828,14 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>)
SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>)
SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>)
SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter<>)
SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter<>)
SIMPLE_CONVERTER_CASE(Type::SPARSE_UNION, UnionConverter)
SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter)
SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS, IntegerConverter<MonthIntervalType>)
SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter)
default:
return not_implemented();
return ConversionNotImplemented(type);
}

#undef SIMPLE_CONVERTER_CASE
Expand All @@ -780,6 +845,8 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
return Status::OK();
}

} // namespace

Status ArrayFromJSON(const std::shared_ptr<DataType>& type, util::string_view json_string,
std::shared_ptr<Array>* out) {
std::shared_ptr<Converter> converter;
Expand Down
Loading