diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 4bf0b8ef4c6..1b387cd7cc6 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -1569,6 +1569,20 @@ class RepeatedArrayFactory { return FinishFixedWidth(value.data(), value.size()); } + Status Visit(const DictionaryType& type) { + std::shared_ptr dictionary, indices; + + const auto& value = checked_cast(scalar_).value; + RETURN_NOT_OK(MakeArrayFromScalar(pool_, *value, 1, &dictionary)); + + ARROW_ASSIGN_OR_RAISE(auto zero, MakeScalar(type.index_type(), 0)); + RETURN_NOT_OK(MakeArrayFromScalar(pool_, *zero, length_, &indices)); + + *out_ = std::make_shared(scalar_.type, std::move(indices), + std::move(dictionary)); + return Status::OK(); + } + Status Visit(const DataType& type) { return Status::NotImplemented("construction from scalar of type ", *scalar_.type); } diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 0a085aceb5e..d6ee4f1d26a 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -35,8 +35,8 @@ class MemoryPool; // Helper functions struct DictionaryBuilderCase { - template - Status Visit(const ValueType&, typename ValueType::c_type* = nullptr) { + template + Status Visit(const ValueType&) { return CreateFor(); } diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc index 8741d169eab..4cc748920c9 100644 --- a/cpp/src/arrow/dataset/dataset_test.cc +++ b/cpp/src/arrow/dataset/dataset_test.cc @@ -403,13 +403,21 @@ TEST_F(TestEndToEnd, EndToEndSingleDataset) { AssertTablesEqual(*expected, *table, false, true); } +inline std::shared_ptr SchemaFromNames(const std::vector names) { + std::vector> fields; + for (const auto& name : names) { + fields.push_back(field(name, int32())); + } + + return schema(fields); +} + class TestSchemaUnification : public TestDataset { public: using i32 = util::optional; + using PathAndContent = std::vector>; - void SetUp() { - using PathAndContent = std::vector>; - + void SetUp() override { // The following test creates 2 sources with divergent but compatible // schemas. Each source have a common partitioning where the // fields are not materialized in the data fragments. @@ -443,7 +451,7 @@ class TestSchemaUnification : public TestDataset { auto get_source = [this](std::string base, std::vector paths) -> Result> { - auto resolver = [this](const FileSource& source) -> std::shared_ptr { + auto resolver = [](const FileSource& source) -> std::shared_ptr { auto path = source.path(); // A different schema for each data fragment. if (path == ds1_df1) { @@ -488,15 +496,6 @@ class TestSchemaUnification : public TestDataset { std::make_shared(schema_, DatasetVector{ds1, ds2}); } - std::shared_ptr SchemaFromNames(const std::vector names) { - std::vector> fields; - for (const auto& name : names) { - fields.push_back(field(name, int32())); - } - - return schema(fields); - } - template void AssertScanEquals(std::shared_ptr scanner, const std::vector& expected_rows) { @@ -642,5 +641,40 @@ TEST_F(TestSchemaUnification, SelectMixedColumnsAndFilter) { AssertBuilderEquals(scan_builder, rows); } +TEST(TestDictPartitionColumn, SelectPartitionColumnFilterPhysicalColumn) { + auto partition_field = field("part", dictionary(int32(), utf8())); + auto path = "/dataset/part=one/data.json"; + + auto mock_fs = std::make_shared(fs::kNoTime); + ARROW_EXPECT_OK(mock_fs->CreateFile(path, R"([ {"phy_1": 111, "phy_2": 211} ])", + /* recursive */ true)); + + auto physical_schema = SchemaFromNames({"phy_1", "phy_2"}); + auto format = std::make_shared( + [=](const FileSource&) { return physical_schema; }); + + FileSystemFactoryOptions options; + options.partition_base_dir = "/dataset"; + options.partitioning = std::make_shared(schema({partition_field})); + + ASSERT_OK_AND_ASSIGN(auto factory, + FileSystemDatasetFactory::Make(mock_fs, {path}, format, options)); + + ASSERT_OK_AND_ASSIGN(auto schema, factory->Inspect()); + + ASSERT_OK_AND_ASSIGN(auto dataset, factory->Finish(schema)); + + // Selects re-ordered virtual column with a filter on a physical column + ASSERT_OK_AND_ASSIGN(auto scan_builder, dataset->NewScan()); + ASSERT_OK(scan_builder->Filter("phy_1"_ == 111)); + + ASSERT_OK(scan_builder->Project({"part"})); + + ASSERT_OK_AND_ASSIGN(auto scanner, scan_builder->Finish()); + ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable()); + AssertArraysEqual(*table->column(0)->chunk(0), + *DictArrayFromJSON(partition_field->type(), "[0]", "[\"one\"]")); +} + } // namespace dataset } // namespace arrow diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index 06320b844d2..c301417c43e 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -660,7 +660,7 @@ class StructConverter final : public ConcreteConverter { }; // ------------------------------------------------------------------------ -// Converter for struct arrays +// Converter for union arrays class UnionConverter final : public ConcreteConverter { public: @@ -701,9 +701,8 @@ class UnionConverter final : public ConcreteConverter { return builder_->AppendNull(); } - // Append a JSON value that is either an array of N elements in order - // or an object mapping struct names to values (omitted struct members - // are mapped to null). + // Append a JSON value that must be a 2-long array, containing the type_id + // and value of the UnionArray's slot. Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return AppendNull(); @@ -822,8 +821,8 @@ Status GetConverter(const std::shared_ptr& type, return Status::OK(); } -Status ArrayFromJSON(const std::shared_ptr& type, - const util::string_view& json_string, std::shared_ptr* out) { +Status ArrayFromJSON(const std::shared_ptr& type, util::string_view json_string, + std::shared_ptr* out) { std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); @@ -849,6 +848,24 @@ Status ArrayFromJSON(const std::shared_ptr& type, const char* json_str return ArrayFromJSON(type, util::string_view(json_string), out); } +Status DictArrayFromJSON(const std::shared_ptr& type, + util::string_view indices_json, + util::string_view dictionary_json, std::shared_ptr* out) { + if (type->id() != Type::DICTIONARY) { + return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type); + } + + const auto& dictionary_type = checked_cast(*type); + + std::shared_ptr indices, dictionary; + RETURN_NOT_OK(ArrayFromJSON(dictionary_type.index_type(), indices_json, &indices)); + RETURN_NOT_OK( + ArrayFromJSON(dictionary_type.value_type(), dictionary_json, &dictionary)); + + return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary), + out); +} + } // namespace json } // namespace internal } // namespace ipc diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/ipc/json_simple.h index da6483ff155..8f6b57a4608 100644 --- a/cpp/src/arrow/ipc/json_simple.h +++ b/cpp/src/arrow/ipc/json_simple.h @@ -17,8 +17,7 @@ // Implement a simple JSON representation format for arrays -#ifndef ARROW_IPC_JSON_SIMPLE_H -#define ARROW_IPC_JSON_SIMPLE_H +#pragma once #include #include @@ -41,16 +40,18 @@ Status ArrayFromJSON(const std::shared_ptr&, const std::string& json, std::shared_ptr* out); ARROW_EXPORT -Status ArrayFromJSON(const std::shared_ptr&, const util::string_view& json, +Status ArrayFromJSON(const std::shared_ptr&, util::string_view json, std::shared_ptr* out); ARROW_EXPORT Status ArrayFromJSON(const std::shared_ptr&, const char* json, std::shared_ptr* out); +ARROW_EXPORT +Status DictArrayFromJSON(const std::shared_ptr&, util::string_view indices_json, + util::string_view dictionary_json, std::shared_ptr* out); + } // namespace json } // namespace internal } // namespace ipc } // namespace arrow - -#endif // ARROW_IPC_JSON_SIMPLE_H diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index f88dbe63c0c..940f985f5d2 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -1179,10 +1179,14 @@ TEST(TestDenseUnion, Errors) { std::shared_ptr type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]", &array)); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 8]]", &array)); + ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]", &array)); + ASSERT_RAISES(Invalid, + ArrayFromJSON(type, "[[0, 99]]", &array)); // 0 is not one of {4, 8} + ASSERT_RAISES(Invalid, + ArrayFromJSON(type, "[[4, \"\"]]", &array)); // "" is not a valid int8() + + ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]", &array)); ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array)); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]", &array)); ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array)); } @@ -1192,13 +1196,37 @@ TEST(TestSparseUnion, Errors) { std::shared_ptr type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]", &array)); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 8]]", &array)); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array)); + ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]", &array)); + ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 99]]", &array)); ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]", &array)); + + ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]", &array)); + ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array)); ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array)); } +TEST(TestDictionary, Basics) { + auto type = dictionary(int32(), utf8()); + auto array = + DictArrayFromJSON(type, "[null, 2, 1, 0]", R"(["whiskey", "tango", "foxtrot"])"); + + auto expected_indices = ArrayFromJSON(int32(), "[null, 2, 1, 0]"); + auto expected_dictionary = ArrayFromJSON(utf8(), R"(["whiskey", "tango", "foxtrot"])"); + + ASSERT_ARRAYS_EQUAL(DictionaryArray(type, expected_indices, expected_dictionary), + *array); +} + +TEST(TestDictionary, Errors) { + auto type = dictionary(int32(), utf8()); + std::shared_ptr array; + + ASSERT_RAISES(Invalid, + DictArrayFromJSON(type, "[\"not a valid index\"]", "[\"\"]", &array)); + ASSERT_RAISES(Invalid, DictArrayFromJSON(type, "[0, 1]", "[1]", + &array)); // dict value isn't string +} + } // namespace json } // namespace internal } // namespace ipc diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index ae53b3c19ed..9ef05271b8d 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -127,25 +127,31 @@ size_t Scalar::Hash::hash(const Scalar& scalar) { return ScalarHashImpl(scalar). StringScalar::StringScalar(std::string s) : StringScalar(Buffer::FromString(std::move(s))) {} -FixedSizeBinaryScalar::FixedSizeBinaryScalar(const std::shared_ptr& value, - const std::shared_ptr& type) - : BinaryScalar(value, type) { - ARROW_CHECK_EQ(checked_cast(*type).byte_width(), - value->size()); +FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::shared_ptr value, + std::shared_ptr type) + : BinaryScalar(std::move(value), std::move(type)) { + ARROW_CHECK_EQ(checked_cast(*this->type).byte_width(), + this->value->size()); } -BaseListScalar::BaseListScalar(const std::shared_ptr& value, - const std::shared_ptr& type) - : Scalar{type, true}, value(value) {} +BaseListScalar::BaseListScalar(std::shared_ptr value, + std::shared_ptr type) + : Scalar{std::move(type), true}, value(std::move(value)) {} -BaseListScalar::BaseListScalar(const std::shared_ptr& value) - : BaseListScalar(value, value->type()) {} +BaseListScalar::BaseListScalar(std::shared_ptr value) + : Scalar(value->type(), true), value(std::move(value)) {} -FixedSizeListScalar::FixedSizeListScalar(const std::shared_ptr& value, - const std::shared_ptr& type) - : BaseListScalar(value, type) { - ARROW_CHECK_EQ(value->length(), - checked_cast(type.get())->list_size()); +FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr value, + std::shared_ptr type) + : BaseListScalar(std::move(value), std::move(type)) { + ARROW_CHECK_EQ(this->value->length(), + checked_cast(*this->type).list_size()); +} + +DictionaryScalar::DictionaryScalar(std::shared_ptr type) + : Scalar(std::move(type)), + value( + MakeNullScalar(checked_cast(*this->type).value_type())) { } template @@ -173,15 +179,18 @@ struct MakeNullImpl { return Status::OK(); } - const std::shared_ptr& type_; + std::shared_ptr Finish() && { + // Should not fail. + DCHECK_OK(VisitTypeInline(*type_, this)); + return std::move(out_); + } + + std::shared_ptr type_; std::shared_ptr out_; }; -std::shared_ptr MakeNullScalar(const std::shared_ptr& type) { - MakeNullImpl impl = {type, nullptr}; - // Should not fail. - DCHECK_OK(VisitTypeInline(*type, &impl)); - return std::move(impl.out_); +std::shared_ptr MakeNullScalar(std::shared_ptr type) { + return MakeNullImpl{std::move(type), nullptr}.Finish(); } std::string Scalar::ToString() const { @@ -207,7 +216,12 @@ struct ScalarParseImpl { Status Visit(const LargeBinaryType&) { return FinishWithBuffer(); } - Status Visit(const FixedSizeBinaryType& t) { return FinishWithBuffer(); } + Status Visit(const FixedSizeBinaryType&) { return FinishWithBuffer(); } + + Status Visit(const DictionaryType& t) { + ARROW_ASSIGN_OR_RAISE(auto value, Scalar::Parse(t.value_type(), s_)); + return Finish(std::move(value)); + } Status Visit(const DataType& t) { return Status::NotImplemented("parsing scalars of type ", t); @@ -215,26 +229,27 @@ struct ScalarParseImpl { template Status Finish(Arg&& arg) { - return MakeScalar(type_, std::forward(arg)).Value(out_); + return MakeScalar(std::move(type_), std::forward(arg)).Value(&out_); } Status FinishWithBuffer() { return Finish(Buffer::FromString(s_.to_string())); } - ScalarParseImpl(const std::shared_ptr& type, util::string_view s, - std::shared_ptr* out) - : type_(type), s_(s), out_(out) {} + Result> Finish() && { + RETURN_NOT_OK(VisitTypeInline(*type_, this)); + return std::move(out_); + } + + ScalarParseImpl(std::shared_ptr type, util::string_view s) + : type_(std::move(type)), s_(s) {} - const std::shared_ptr& type_; + std::shared_ptr type_; util::string_view s_; - std::shared_ptr* out_; + std::shared_ptr out_; }; Result> Scalar::Parse(const std::shared_ptr& type, util::string_view s) { - std::shared_ptr out; - ScalarParseImpl impl = {type, s, &out}; - RETURN_NOT_OK(VisitTypeInline(*type, &impl)); - return out; + return ScalarParseImpl{type, s}.Finish(); } namespace internal { diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h index 4a21689339a..e3880f62ae9 100644 --- a/cpp/src/arrow/scalar.h +++ b/cpp/src/arrow/scalar.h @@ -48,7 +48,7 @@ class Array; struct ARROW_EXPORT Scalar : public util::EqualityComparable { virtual ~Scalar() = default; - explicit Scalar(const std::shared_ptr& type) : type(type), is_valid(false) {} + explicit Scalar(std::shared_ptr type) : type(std::move(type)) {} /// \brief The type of the scalar value std::shared_ptr type; @@ -79,8 +79,8 @@ struct ARROW_EXPORT Scalar : public util::EqualityComparable { Result> CastTo(std::shared_ptr to) const; protected: - Scalar(const std::shared_ptr& type, bool is_valid) - : type(type), is_valid(is_valid) {} + Scalar(std::shared_ptr type, bool is_valid) + : type(std::move(type)), is_valid(is_valid) {} }; /// \brief A scalar value for NullType. Never valid @@ -100,9 +100,9 @@ struct ARROW_EXPORT PrimitiveScalar : public Scalar { using ValueType = CType; // Non-null constructor. - PrimitiveScalar(ValueType value, const std::shared_ptr& type) - : Scalar(type, true), value(value) { - ARROW_CHECK_EQ(type->id(), T::type_id); + PrimitiveScalar(ValueType value, std::shared_ptr type) + : Scalar(std::move(type), true), value(value) { + ARROW_CHECK_EQ(this->type->id(), T::type_id); } explicit PrimitiveScalar(ValueType value) @@ -175,21 +175,19 @@ struct ARROW_EXPORT BaseBinaryScalar : public Scalar { std::shared_ptr value; protected: - BaseBinaryScalar(const std::shared_ptr& value, - const std::shared_ptr& type) - : Scalar{type, true}, value(value) {} + BaseBinaryScalar(std::shared_ptr value, std::shared_ptr type) + : Scalar{std::move(type), true}, value(std::move(value)) {} }; struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar { using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = BinaryScalar; - BinaryScalar(const std::shared_ptr& value, - const std::shared_ptr& type) - : BaseBinaryScalar(value, type) {} + BinaryScalar(std::shared_ptr value, std::shared_ptr type) + : BaseBinaryScalar(std::move(value), std::move(type)) {} - explicit BinaryScalar(const std::shared_ptr& value) - : BinaryScalar(value, binary()) {} + explicit BinaryScalar(std::shared_ptr value) + : BinaryScalar(std::move(value), binary()) {} BinaryScalar() : BinaryScalar(binary()) {} }; @@ -198,8 +196,8 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar { using BinaryScalar::BinaryScalar; using TypeClass = StringType; - explicit StringScalar(const std::shared_ptr& value) - : StringScalar(value, utf8()) {} + explicit StringScalar(std::shared_ptr value) + : StringScalar(std::move(value), utf8()) {} explicit StringScalar(std::string s); @@ -210,12 +208,11 @@ struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar { using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = LargeBinaryScalar; - LargeBinaryScalar(const std::shared_ptr& value, - const std::shared_ptr& type) - : BaseBinaryScalar(value, type) {} + LargeBinaryScalar(std::shared_ptr value, std::shared_ptr type) + : BaseBinaryScalar(std::move(value), std::move(type)) {} - explicit LargeBinaryScalar(const std::shared_ptr& value) - : LargeBinaryScalar(value, large_binary()) {} + explicit LargeBinaryScalar(std::shared_ptr value) + : LargeBinaryScalar(std::move(value), large_binary()) {} LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {} }; @@ -224,8 +221,8 @@ struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar { using LargeBinaryScalar::LargeBinaryScalar; using TypeClass = LargeStringType; - explicit LargeStringScalar(const std::shared_ptr& value) - : LargeStringScalar(value, large_utf8()) {} + explicit LargeStringScalar(std::shared_ptr value) + : LargeStringScalar(std::move(value), large_utf8()) {} LargeStringScalar() : LargeStringScalar(large_utf8()) {} }; @@ -233,11 +230,9 @@ struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar { struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar { using TypeClass = FixedSizeBinaryType; - FixedSizeBinaryScalar(const std::shared_ptr& value, - const std::shared_ptr& type); + FixedSizeBinaryScalar(std::shared_ptr value, std::shared_ptr type); - explicit FixedSizeBinaryScalar(const std::shared_ptr& type) - : BinaryScalar(type) {} + explicit FixedSizeBinaryScalar(std::shared_ptr type) : BinaryScalar(type) {} }; template @@ -246,10 +241,11 @@ struct ARROW_EXPORT TemporalScalar : public Scalar { using TypeClass = T; using ValueType = typename T::c_type; - TemporalScalar(ValueType value, const std::shared_ptr& type) - : Scalar(type, true), value(value) {} + TemporalScalar(ValueType value, std::shared_ptr type) + : Scalar(std::move(type), true), value(value) {} - explicit TemporalScalar(const std::shared_ptr& type) : Scalar(type, false) {} + explicit TemporalScalar(std::shared_ptr type) + : Scalar(std::move(type), false) {} ValueType value; }; @@ -260,7 +256,7 @@ struct ARROW_EXPORT DateScalar : public TemporalScalar { using ValueType = typename TemporalScalar::ValueType; explicit DateScalar(ValueType value) - : TemporalScalar(value, TypeTraits::type_singleton()) {} + : TemporalScalar(std::move(value), TypeTraits::type_singleton()) {} DateScalar() : TemporalScalar(TypeTraits::type_singleton()) {} }; @@ -316,8 +312,8 @@ struct ARROW_EXPORT Decimal128Scalar : public Scalar { using TypeClass = Decimal128Type; using ValueType = Decimal128; - Decimal128Scalar(const Decimal128& value, const std::shared_ptr& type) - : Scalar(type, true), value(std::move(value)) {} + Decimal128Scalar(Decimal128 value, std::shared_ptr type) + : Scalar(std::move(type), true), value(value) {} Decimal128 value; }; @@ -326,10 +322,9 @@ struct ARROW_EXPORT BaseListScalar : public Scalar { using Scalar::Scalar; using ValueType = std::shared_ptr; - BaseListScalar(const std::shared_ptr& value, - const std::shared_ptr& type); + BaseListScalar(std::shared_ptr value, std::shared_ptr type); - explicit BaseListScalar(const std::shared_ptr& value); + explicit BaseListScalar(std::shared_ptr value); std::shared_ptr value; }; @@ -353,8 +348,7 @@ struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar { using TypeClass = FixedSizeListType; using BaseListScalar::BaseListScalar; - FixedSizeListScalar(const std::shared_ptr& value, - const std::shared_ptr& type); + FixedSizeListScalar(std::shared_ptr value, std::shared_ptr type); }; struct ARROW_EXPORT StructScalar : public Scalar { @@ -363,10 +357,10 @@ struct ARROW_EXPORT StructScalar : public Scalar { std::vector> value; - StructScalar(ValueType value, const std::shared_ptr& type) - : Scalar(type, true), value(std::move(value)) {} + StructScalar(ValueType value, std::shared_ptr type) + : Scalar(std::move(type), true), value(std::move(value)) {} - explicit StructScalar(const std::shared_ptr& type) : Scalar(type) {} + explicit StructScalar(std::shared_ptr type) : Scalar(std::move(type)) {} }; struct ARROW_EXPORT UnionScalar : public Scalar { @@ -375,8 +369,14 @@ struct ARROW_EXPORT UnionScalar : public Scalar { }; struct ARROW_EXPORT DictionaryScalar : public Scalar { - using Scalar::Scalar; using TypeClass = DictionaryType; + using ValueType = std::shared_ptr; + ValueType value; + + explicit DictionaryScalar(std::shared_ptr type); + + DictionaryScalar(ValueType value, std::shared_ptr type) + : Scalar(std::move(type), true), value(std::move(value)) {} }; struct ARROW_EXPORT ExtensionScalar : public Scalar { @@ -385,7 +385,7 @@ struct ARROW_EXPORT ExtensionScalar : public Scalar { }; ARROW_EXPORT -std::shared_ptr MakeNullScalar(const std::shared_ptr& type); +std::shared_ptr MakeNullScalar(std::shared_ptr type); namespace internal { @@ -394,33 +394,20 @@ inline Status CheckBufferLength(...) { return Status::OK(); } ARROW_EXPORT Status CheckBufferLength(const FixedSizeBinaryType* t, const std::shared_ptr* b); -template -struct is_simple_scalar : std::false_type {}; - -template -struct is_simple_scalar< - T, - typename std::enable_if< - // scalar has a single extra data member named "value" with type "ValueType" - std::is_same().value), typename T::ValueType>::value && - // scalar is constructible from (value, type) - std::is_constructible>::value>::type> : std::true_type { -}; - } // namespace internal template struct MakeScalarImpl { - template < - typename T, typename ScalarType = typename TypeTraits::ScalarType, - typename ValueType = typename ScalarType::ValueType, - typename Enable = typename std::enable_if< - internal::is_simple_scalar::value && - std::is_same::type>::value>::type> + template ::ScalarType, + typename ValueType = typename ScalarType::ValueType, + typename Enable = typename std::enable_if< + std::is_constructible>::value && + std::is_convertible::value>::type> Status Visit(const T& t) { ARROW_RETURN_NOT_OK(internal::CheckBufferLength(&t, &value_)); - *out_ = std::make_shared(ValueType(static_cast(value_)), type_); + out_ = std::make_shared( + static_cast(static_cast(value_)), std::move(type_)); return Status::OK(); } @@ -428,18 +415,20 @@ struct MakeScalarImpl { return Status::NotImplemented("constructing scalars of type ", t, " from ", value_); } - const std::shared_ptr& type_; + Result> Finish() && { + ARROW_RETURN_NOT_OK(VisitTypeInline(*type_, this)); + return std::move(out_); + } + + std::shared_ptr type_; ValueRef value_; - std::shared_ptr* out_; + std::shared_ptr out_; }; template -Result> MakeScalar(const std::shared_ptr& type, +Result> MakeScalar(std::shared_ptr type, Value&& value) { - std::shared_ptr out; - MakeScalarImpl impl = {type, std::forward(value), &out}; - ARROW_RETURN_NOT_OK(VisitTypeInline(*type, &impl)); - return out; + return MakeScalarImpl{type, std::forward(value), NULLPTR}.Finish(); } /// \brief type inferring scalar factory @@ -452,7 +441,7 @@ std::shared_ptr MakeScalar(Value value) { } inline std::shared_ptr MakeScalar(std::string value) { - return std::make_shared(value); + return std::make_shared(std::move(value)); } } // namespace arrow diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index 009ee80b8a7..d0d864b3e74 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -218,6 +218,15 @@ std::shared_ptr ArrayFromJSON(const std::shared_ptr& type, return out; } +std::shared_ptr DictArrayFromJSON(const std::shared_ptr& type, + util::string_view indices_json, + util::string_view dictionary_json) { + std::shared_ptr out; + ABORT_NOT_OK( + ipc::internal::json::DictArrayFromJSON(type, indices_json, dictionary_json, &out)); + return out; +} + std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr& type, const std::vector& json) { ArrayVector out_chunks; diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h index cd19628ad14..850a52ef814 100644 --- a/cpp/src/arrow/testing/gtest_util.h +++ b/cpp/src/arrow/testing/gtest_util.h @@ -256,8 +256,14 @@ ARROW_EXPORT std::shared_ptr ArrayFromJSON(const std::shared_ptr&, util::string_view json); -ARROW_EXPORT std::shared_ptr RecordBatchFromJSON( - const std::shared_ptr&, util::string_view); +ARROW_EXPORT +std::shared_ptr DictArrayFromJSON(const std::shared_ptr& type, + util::string_view indices_json, + util::string_view dictionary_json); + +ARROW_EXPORT +std::shared_ptr RecordBatchFromJSON(const std::shared_ptr&, + util::string_view); ARROW_EXPORT std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr&,