From 9550f0e653b7a1f8de7e26d5137dc761ca2d3c4d Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Thu, 17 Feb 2022 12:17:33 +0100 Subject: [PATCH 1/6] Fix Substrait consumer compilation for libprotobuf version 3.0.0 --- cpp/src/arrow/engine/substrait/expression_internal.cc | 8 ++++++-- cpp/src/arrow/engine/substrait/plan_internal.cc | 2 ++ cpp/src/arrow/engine/substrait/relation_internal.cc | 3 ++- cpp/src/arrow/engine/substrait/type_internal.cc | 6 +++--- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 686ef5d5572..3f3346426c9 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -443,8 +443,11 @@ struct ScalarToProtoImpl { return Status::OK(); } + // Note: while std::string&& would be better for the argument + // type, older versions of protobuf don't have this overload + // yet. template - Status FromBuffer(void (substrait::Expression::Literal::*set)(std::string&&), + Status FromBuffer(void (substrait::Expression::Literal::*set)(const std::string&), const ScalarWithBufferValue& scalar_with_buffer) { (lit_->*set)(scalar_with_buffer.value->ToString()); return Status::OK(); @@ -857,7 +860,8 @@ Result> ToProto(const compute::Expression // catch the special case of calls convertible to a ListElement if (arguments[0]->has_selection() && arguments[0]->selection().has_direct_reference()) { - if (arguments[1]->has_literal() && arguments[1]->literal().has_i32()) { + if (arguments[1]->has_literal() && arguments[1]->literal().literal_type_case() == + substrait::Expression_Literal::kI32) { return MakeListElementReference(std::move(arguments[0]), arguments[1]->literal().i32()); } diff --git a/cpp/src/arrow/engine/substrait/plan_internal.cc b/cpp/src/arrow/engine/substrait/plan_internal.cc index 8ffbcc005da..5813dcde24c 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.cc +++ b/cpp/src/arrow/engine/substrait/plan_internal.cc @@ -23,6 +23,8 @@ #include "arrow/util/make_unique.h" #include "arrow/util/unreachable.h" +#include + namespace arrow { using internal::checked_cast; diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index ae2244c87f5..4ef19349a8d 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -90,7 +90,8 @@ Result FromProto(const substrait::Rel& rel, std::vector> fragments; for (const auto& item : read.local_files().items()) { - if (!item.has_uri_file()) { + if (item.path_type_case() != + substrait::ReadRel_LocalFiles_FileOrFiles::kUriFile) { return Status::NotImplemented( "substrait::ReadRel::LocalFiles::FileOrFiles with " "path_type other than uri_file"); diff --git a/cpp/src/arrow/engine/substrait/type_internal.cc b/cpp/src/arrow/engine/substrait/type_internal.cc index 49ca1bbfabf..c1dac97b682 100644 --- a/cpp/src/arrow/engine/substrait/type_internal.cc +++ b/cpp/src/arrow/engine/substrait/type_internal.cc @@ -79,8 +79,8 @@ Result FieldsFromProto(int size, const Types& types, std::shared_ptr type; bool nullable; - if (types[i].has_struct_()) { - const auto& struct_ = types[i].struct_(); + if (types.Get(i).has_struct_()) { + const auto& struct_ = types.Get(i).struct_(); ARROW_ASSIGN_OR_RAISE( type, FieldsFromProto(struct_.types_size(), struct_.types(), next_name, ext_set) @@ -88,7 +88,7 @@ Result FieldsFromProto(int size, const Types& types, nullable = IsNullable(struct_); } else { - ARROW_ASSIGN_OR_RAISE(std::tie(type, nullable), FromProto(types[i], ext_set)); + ARROW_ASSIGN_OR_RAISE(std::tie(type, nullable), FromProto(types.Get(i), ext_set)); } fields[i] = field(std::move(name), std::move(type), nullable); From ee33fcfc22ee9041e47f834c9395070a9a6ae589 Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Thu, 17 Feb 2022 12:20:23 +0100 Subject: [PATCH 2/6] Demand at least libprotobuf 3.0.0 when ARROW_ENGINE is enabled --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 32af0a00ad1..5b560591235 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1530,6 +1530,9 @@ if(ARROW_WITH_PROTOBUF) # google::protobuf::MessageLite::ByteSize() is deprecated since # Protobuf 3.4.0. set(ARROW_PROTOBUF_REQUIRED_VERSION "3.4.0") + elseif(ARROW_ENGINE) + # Substrait protobuf files use proto3 syntax + set(ARROW_PROTOBUF_REQUIRED_VERSION "3.0.0") else() set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1") endif() From 751fb9d70f6682d169232ccc25ce17a5092ff2dd Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Thu, 17 Feb 2022 12:21:16 +0100 Subject: [PATCH 3/6] Allow bundled toolchain versions to be overridden without changing versions.txt --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 5b560591235..142f61f2c20 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -359,10 +359,19 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) continue() endif() - # For debugging - message(STATUS "${_VARIABLE_NAME}: ${_VARIABLE_VALUE}") + # Allow values to be overridden to test specific versions during development + if(${_VARIABLE_NAME}) - set(${_VARIABLE_NAME} ${_VARIABLE_VALUE}) + # For debugging + message(STATUS "${_VARIABLE_NAME}: overridden to ${${_VARIABLE_NAME}}") + + else() + + # For debugging + message(STATUS "${_VARIABLE_NAME}: ${_VARIABLE_VALUE}") + + set(${_VARIABLE_NAME} ${_VARIABLE_VALUE}) + endif() endforeach() set(THIRDPARTY_MIRROR_URL "https://apache.jfrog.io/artifactory/arrow/thirdparty/7.0.0") From 7ffd90f08ab06c3462db55507d8a560fa4c60ba9 Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Thu, 17 Feb 2022 15:07:55 +0100 Subject: [PATCH 4/6] Ensure string-like data is moved if protobuf version --- .../engine/substrait/expression_internal.cc | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 3f3346426c9..d18ae4dcb41 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -443,13 +443,10 @@ struct ScalarToProtoImpl { return Status::OK(); } - // Note: while std::string&& would be better for the argument - // type, older versions of protobuf don't have this overload - // yet. - template - Status FromBuffer(void (substrait::Expression::Literal::*set)(const std::string&), + template + Status FromBuffer(LiteralSetter&& set_lit, const ScalarWithBufferValue& scalar_with_buffer) { - (lit_->*set)(scalar_with_buffer.value->ToString()); + set_lit(lit_, scalar_with_buffer.value->ToString()); return Status::OK(); } @@ -469,11 +466,18 @@ struct ScalarToProtoImpl { Status Visit(const FloatScalar& s) { return Primitive(&Lit::set_fp32, s); } Status Visit(const DoubleScalar& s) { return Primitive(&Lit::set_fp64, s); } - Status Visit(const StringScalar& s) { return FromBuffer(&Lit::set_string, s); } - Status Visit(const BinaryScalar& s) { return FromBuffer(&Lit::set_binary, s); } + Status Visit(const StringScalar& s) { + return FromBuffer([](Lit* lit, std::string&& s) { lit->set_string(std::move(s)); }, + s); + } + Status Visit(const BinaryScalar& s) { + return FromBuffer([](Lit* lit, std::string&& s) { lit->set_binary(std::move(s)); }, + s); + } Status Visit(const FixedSizeBinaryScalar& s) { - return FromBuffer(&Lit::set_fixed_binary, s); + return FromBuffer( + [](Lit* lit, std::string&& s) { lit->set_fixed_binary(std::move(s)); }, s); } Status Visit(const Date32Scalar& s) { return Primitive(&Lit::set_date, s); } @@ -597,13 +601,14 @@ struct ScalarToProtoImpl { Status Visit(const ExtensionScalar& s) { if (UnwrapUuid(*s.type)) { - return FromBuffer(&Lit::set_uuid, + return FromBuffer([](Lit* lit, std::string&& s) { lit->set_uuid(std::move(s)); }, checked_cast(*s.value)); } if (UnwrapFixedChar(*s.type)) { - return FromBuffer(&Lit::set_fixed_char, - checked_cast(*s.value)); + return FromBuffer( + [](Lit* lit, std::string&& s) { lit->set_fixed_char(std::move(s)); }, + checked_cast(*s.value)); } if (auto length = UnwrapVarChar(*s.type)) { From 2144b75a660a3ce9ca21199d69b2769724951c20 Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Thu, 17 Feb 2022 18:15:14 +0100 Subject: [PATCH 5/6] Attempt to solve inconsistent build failure due to non-existant output directory --- cpp/src/arrow/engine/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index a1099b80653..9593baed27a 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -66,6 +66,8 @@ if(MSVC) list(APPEND SUBSTRAIT_SUPPRESSED_WARNINGS "/wd4251") endif() +file(MAKE_DIRECTORY "${SUBSTRAIT_GEN_DIR}/substrait") + set(SUBSTRAIT_PROTO_GEN_ALL) foreach(SUBSTRAIT_PROTO ${SUBSTRAIT_PROTOS}) set(SUBSTRAIT_PROTO_GEN "${SUBSTRAIT_GEN_DIR}/substrait/${SUBSTRAIT_PROTO}.pb") From f3b23a055694288ea73cbd77bafaa68855f69dc6 Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Thu, 17 Feb 2022 18:22:26 +0100 Subject: [PATCH 6/6] Revert "Allow bundled toolchain versions to be overridden without changing versions.txt" This reverts commit 751fb9d70f6682d169232ccc25ce17a5092ff2dd. --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 142f61f2c20..5b560591235 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -359,19 +359,10 @@ foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) continue() endif() - # Allow values to be overridden to test specific versions during development - if(${_VARIABLE_NAME}) + # For debugging + message(STATUS "${_VARIABLE_NAME}: ${_VARIABLE_VALUE}") - # For debugging - message(STATUS "${_VARIABLE_NAME}: overridden to ${${_VARIABLE_NAME}}") - - else() - - # For debugging - message(STATUS "${_VARIABLE_NAME}: ${_VARIABLE_VALUE}") - - set(${_VARIABLE_NAME} ${_VARIABLE_VALUE}) - endif() + set(${_VARIABLE_NAME} ${_VARIABLE_VALUE}) endforeach() set(THIRDPARTY_MIRROR_URL "https://apache.jfrog.io/artifactory/arrow/thirdparty/7.0.0")