diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 32af0a00ad1..5b560591235 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1530,6 +1530,9 @@ if(ARROW_WITH_PROTOBUF) # google::protobuf::MessageLite::ByteSize() is deprecated since # Protobuf 3.4.0. set(ARROW_PROTOBUF_REQUIRED_VERSION "3.4.0") + elseif(ARROW_ENGINE) + # Substrait protobuf files use proto3 syntax + set(ARROW_PROTOBUF_REQUIRED_VERSION "3.0.0") else() set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1") endif() diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index a1099b80653..9593baed27a 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -66,6 +66,8 @@ if(MSVC) list(APPEND SUBSTRAIT_SUPPRESSED_WARNINGS "/wd4251") endif() +file(MAKE_DIRECTORY "${SUBSTRAIT_GEN_DIR}/substrait") + set(SUBSTRAIT_PROTO_GEN_ALL) foreach(SUBSTRAIT_PROTO ${SUBSTRAIT_PROTOS}) set(SUBSTRAIT_PROTO_GEN "${SUBSTRAIT_GEN_DIR}/substrait/${SUBSTRAIT_PROTO}.pb") diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 686ef5d5572..d18ae4dcb41 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -443,10 +443,10 @@ struct ScalarToProtoImpl { return Status::OK(); } - template - Status FromBuffer(void (substrait::Expression::Literal::*set)(std::string&&), + template + Status FromBuffer(LiteralSetter&& set_lit, const ScalarWithBufferValue& scalar_with_buffer) { - (lit_->*set)(scalar_with_buffer.value->ToString()); + set_lit(lit_, scalar_with_buffer.value->ToString()); return Status::OK(); } @@ -466,11 +466,18 @@ struct ScalarToProtoImpl { Status Visit(const FloatScalar& s) { return Primitive(&Lit::set_fp32, s); } Status Visit(const DoubleScalar& s) { return Primitive(&Lit::set_fp64, s); } - Status Visit(const StringScalar& s) { return FromBuffer(&Lit::set_string, s); } - Status Visit(const BinaryScalar& s) { return FromBuffer(&Lit::set_binary, s); } + Status Visit(const StringScalar& s) { + return FromBuffer([](Lit* lit, std::string&& s) { lit->set_string(std::move(s)); }, + s); + } + Status Visit(const BinaryScalar& s) { + return FromBuffer([](Lit* lit, std::string&& s) { lit->set_binary(std::move(s)); }, + s); + } Status Visit(const FixedSizeBinaryScalar& s) { - return FromBuffer(&Lit::set_fixed_binary, s); + return FromBuffer( + [](Lit* lit, std::string&& s) { lit->set_fixed_binary(std::move(s)); }, s); } Status Visit(const Date32Scalar& s) { return Primitive(&Lit::set_date, s); } @@ -594,13 +601,14 @@ struct ScalarToProtoImpl { Status Visit(const ExtensionScalar& s) { if (UnwrapUuid(*s.type)) { - return FromBuffer(&Lit::set_uuid, + return FromBuffer([](Lit* lit, std::string&& s) { lit->set_uuid(std::move(s)); }, checked_cast(*s.value)); } if (UnwrapFixedChar(*s.type)) { - return FromBuffer(&Lit::set_fixed_char, - checked_cast(*s.value)); + return FromBuffer( + [](Lit* lit, std::string&& s) { lit->set_fixed_char(std::move(s)); }, + checked_cast(*s.value)); } if (auto length = UnwrapVarChar(*s.type)) { @@ -857,7 +865,8 @@ Result> ToProto(const compute::Expression // catch the special case of calls convertible to a ListElement if (arguments[0]->has_selection() && arguments[0]->selection().has_direct_reference()) { - if (arguments[1]->has_literal() && arguments[1]->literal().has_i32()) { + if (arguments[1]->has_literal() && arguments[1]->literal().literal_type_case() == + substrait::Expression_Literal::kI32) { return MakeListElementReference(std::move(arguments[0]), arguments[1]->literal().i32()); } diff --git a/cpp/src/arrow/engine/substrait/plan_internal.cc b/cpp/src/arrow/engine/substrait/plan_internal.cc index 8ffbcc005da..5813dcde24c 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.cc +++ b/cpp/src/arrow/engine/substrait/plan_internal.cc @@ -23,6 +23,8 @@ #include "arrow/util/make_unique.h" #include "arrow/util/unreachable.h" +#include + namespace arrow { using internal::checked_cast; diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index ae2244c87f5..4ef19349a8d 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -90,7 +90,8 @@ Result FromProto(const substrait::Rel& rel, std::vector> fragments; for (const auto& item : read.local_files().items()) { - if (!item.has_uri_file()) { + if (item.path_type_case() != + substrait::ReadRel_LocalFiles_FileOrFiles::kUriFile) { return Status::NotImplemented( "substrait::ReadRel::LocalFiles::FileOrFiles with " "path_type other than uri_file"); diff --git a/cpp/src/arrow/engine/substrait/type_internal.cc b/cpp/src/arrow/engine/substrait/type_internal.cc index 49ca1bbfabf..c1dac97b682 100644 --- a/cpp/src/arrow/engine/substrait/type_internal.cc +++ b/cpp/src/arrow/engine/substrait/type_internal.cc @@ -79,8 +79,8 @@ Result FieldsFromProto(int size, const Types& types, std::shared_ptr type; bool nullable; - if (types[i].has_struct_()) { - const auto& struct_ = types[i].struct_(); + if (types.Get(i).has_struct_()) { + const auto& struct_ = types.Get(i).struct_(); ARROW_ASSIGN_OR_RAISE( type, FieldsFromProto(struct_.types_size(), struct_.types(), next_name, ext_set) @@ -88,7 +88,7 @@ Result FieldsFromProto(int size, const Types& types, nullable = IsNullable(struct_); } else { - ARROW_ASSIGN_OR_RAISE(std::tie(type, nullable), FromProto(types[i], ext_set)); + ARROW_ASSIGN_OR_RAISE(std::tie(type, nullable), FromProto(types.Get(i), ext_set)); } fields[i] = field(std::move(name), std::move(type), nullable);