From 253c9805db2006f37feb837bd14c0ee7a614dfdc Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 30 Sep 2020 19:13:32 +0200 Subject: [PATCH 1/2] ARROW-7372: [C++] Allow creating dictionary array from simple JSON Simple value types are supported: integers, string-like, decimal --- cpp/src/arrow/array/builder_dict.h | 6 + cpp/src/arrow/builder.cc | 1 + cpp/src/arrow/builder.h | 4 +- cpp/src/arrow/ipc/json_simple.cc | 178 +++++++++++++++++----- cpp/src/arrow/ipc/json_simple_test.cc | 205 ++++++++++++++++++-------- 5 files changed, 297 insertions(+), 97 deletions(-) diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h index db3db0fa6ab..c5db0d157c9 100644 --- a/cpp/src/arrow/array/builder_dict.h +++ b/cpp/src/arrow/array/builder_dict.h @@ -190,6 +190,12 @@ class DictionaryBuilderBase : public ArrayBuilder { /// \brief The current number of entries in the dictionary int64_t dictionary_length() const { return memo_table_->size(); } + /// \brief The value byte width (for FixedSizeBinaryType) + template + enable_if_fixed_size_binary byte_width() const { + return byte_width_; + } + /// \brief Append a scalar value Status Append(Value value) { ARROW_RETURN_NOT_OK(Reserve(1)); diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index e07e9ad46cc..1dcbf7851ab 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -50,6 +50,7 @@ struct DictionaryBuilderCase { return Create>(); } Status Visit(const FixedSizeBinaryType&) { return CreateFor(); } + Status Visit(const Decimal128Type&) { return CreateFor(); } Status Visit(const DataType& value_type) { return NotImplemented(value_type); } Status Visit(const HalfFloatType& value_type) { return NotImplemented(value_type); } diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 3202312c47e..54ff2904b91 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -39,7 +39,7 @@ class MemoryPool; /// \brief Construct an empty ArrayBuilder corresponding to the data /// type /// \param[in] pool the MemoryPool to use for allocations -/// \param[in] type an instance of DictionaryType +/// \param[in] type the data type to create the builder for /// \param[out] out the created ArrayBuilder ARROW_EXPORT Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, @@ -48,7 +48,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, /// \brief Construct an empty DictionaryBuilder initialized optionally /// with a pre-existing dictionary /// \param[in] pool the MemoryPool to use for allocations -/// \param[in] type an instance of DictionaryType +/// \param[in] type the dictionary type to create the builder for /// \param[in] dictionary the initial dictionary, if any. May be nullptr /// \param[out] out the created ArrayBuilder ARROW_EXPORT diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index d307373c129..06c9adb976e 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -27,6 +27,7 @@ #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" +#include "arrow/util/logging.h" #include "arrow/util/string_view.h" #include "arrow/util/value_parsing.h" @@ -50,13 +51,35 @@ namespace json { using ::arrow::internal::checked_cast; using ::arrow::internal::checked_pointer_cast; -static constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag; +namespace { -static Status JSONTypeError(const char* expected_type, rj::Type json_type) { +constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag; + +Status JSONTypeError(const char* expected_type, rj::Type json_type) { return Status::Invalid("Expected ", expected_type, " or null, got JSON type ", json_type); } +template +struct RegularBuilderTraits { + using BuilderType = typename TypeTraits::BuilderType; + + static const std::shared_ptr& value_type( + const std::shared_ptr& type) { + return type; + } +}; + +template +struct DictionaryBuilderTraits { + using BuilderType = DictionaryBuilder; + + static const std::shared_ptr& value_type( + const std::shared_ptr& type) { + return checked_cast(*type).value_type(); + } +}; + class Converter { public: virtual ~Converter() = default; @@ -213,9 +236,12 @@ enable_if_physical_floating_point ConvertNumber(const rj::Value& json // ------------------------------------------------------------------------ // Converter for int arrays -template -class IntegerConverter final : public ConcreteConverter> { +template class BuilderTraits = RegularBuilderTraits> +class IntegerConverter final + : public ConcreteConverter> { + using BuilderType = typename BuilderTraits::BuilderType; using c_type = typename Type::c_type; + static constexpr auto is_signed = std::is_signed::value; public: @@ -224,7 +250,8 @@ class IntegerConverter final : public ConcreteConverter> Status Init() override { std::unique_ptr builder; RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder)); - builder_ = checked_pointer_cast>(std::move(builder)); + builder_ = checked_pointer_cast(std::move(builder)); + DCHECK(builder_); return Status::OK(); } @@ -240,20 +267,27 @@ class IntegerConverter final : public ConcreteConverter> std::shared_ptr builder() override { return builder_; } private: - std::shared_ptr> builder_; + std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for float arrays -template -class FloatConverter final : public ConcreteConverter> { +template class BuilderTraits = RegularBuilderTraits> +class FloatConverter final + : public ConcreteConverter> { + using BuilderType = typename BuilderTraits::BuilderType; using c_type = typename Type::c_type; public: - explicit FloatConverter(const std::shared_ptr& type) { - this->type_ = type; - builder_ = std::make_shared>(); + explicit FloatConverter(const std::shared_ptr& type) { this->type_ = type; } + + Status Init() override { + std::unique_ptr builder; + RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder)); + builder_ = checked_pointer_cast(std::move(builder)); + DCHECK(builder_); + return Status::OK(); } Status AppendValue(const rj::Value& json_obj) override { @@ -268,18 +302,29 @@ class FloatConverter final : public ConcreteConverter> { std::shared_ptr builder() override { return builder_; } private: - std::shared_ptr> builder_; + std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for decimal arrays -class DecimalConverter final : public ConcreteConverter { +template