From 0a86c38c94663c6473c151662fc720bda79002ef Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Sat, 28 Oct 2023 18:18:00 +0800 Subject: [PATCH 1/9] Add initial support for extending gandiva stub functions. --- cpp/src/gandiva/CMakeLists.txt | 1 + cpp/src/gandiva/cast_time.cc | 3 +- cpp/src/gandiva/context_helper.cc | 3 +- cpp/src/gandiva/decimal_xlarge.cc | 3 +- cpp/src/gandiva/engine.cc | 9 +- cpp/src/gandiva/engine.h | 2 +- cpp/src/gandiva/exported_funcs.h | 26 ++++-- cpp/src/gandiva/exported_funcs_registry.cc | 5 +- cpp/src/gandiva/exported_funcs_registry.h | 2 +- cpp/src/gandiva/external_stub_functions.cc | 87 ++++++++++++++++++++ cpp/src/gandiva/function_registry.cc | 11 +++ cpp/src/gandiva/function_registry.h | 7 ++ cpp/src/gandiva/gdv_function_stubs.cc | 3 +- cpp/src/gandiva/gdv_hash_function_stubs.cc | 3 +- cpp/src/gandiva/gdv_string_function_stubs.cc | 5 +- cpp/src/gandiva/tests/projector_test.cc | 24 ++++++ cpp/src/gandiva/tests/test_util.cc | 17 ++++ cpp/src/gandiva/tests/test_util.h | 8 ++ 18 files changed, 199 insertions(+), 20 deletions(-) create mode 100644 cpp/src/gandiva/external_stub_functions.cc diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 3448d516768..180af6d9cb5 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -62,6 +62,7 @@ set(SRC_FILES expression_registry.cc exported_funcs_registry.cc exported_funcs.cc + external_stub_functions.cc filter.cc function_ir_builder.cc function_registry.cc diff --git a/cpp/src/gandiva/cast_time.cc b/cpp/src/gandiva/cast_time.cc index 843ce01f89d..eeb2ea3fdd8 100644 --- a/cpp/src/gandiva/cast_time.cc +++ b/cpp/src/gandiva/cast_time.cc @@ -29,7 +29,7 @@ namespace gandiva { -void ExportedTimeFunctions::AddMappings(Engine* engine) const { +arrow::Status ExportedTimeFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); @@ -42,6 +42,7 @@ void ExportedTimeFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc("gdv_fn_time_with_zone", types->i32_type() /*return_type*/, args, reinterpret_cast(gdv_fn_time_with_zone)); + return arrow::Status::OK(); } } // namespace gandiva diff --git a/cpp/src/gandiva/context_helper.cc b/cpp/src/gandiva/context_helper.cc index 224bfd8f56c..03bbe1b7a67 100644 --- a/cpp/src/gandiva/context_helper.cc +++ b/cpp/src/gandiva/context_helper.cc @@ -25,7 +25,7 @@ namespace gandiva { -void ExportedContextFunctions::AddMappings(Engine* engine) const { +arrow::Status ExportedContextFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); @@ -50,6 +50,7 @@ void ExportedContextFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc("gdv_fn_context_arena_reset", types->void_type(), args, reinterpret_cast(gdv_fn_context_arena_reset)); + return arrow::Status::OK(); } } // namespace gandiva diff --git a/cpp/src/gandiva/decimal_xlarge.cc b/cpp/src/gandiva/decimal_xlarge.cc index caebd8b09e6..21212422f3d 100644 --- a/cpp/src/gandiva/decimal_xlarge.cc +++ b/cpp/src/gandiva/decimal_xlarge.cc @@ -38,7 +38,7 @@ namespace gandiva { -void ExportedDecimalFunctions::AddMappings(Engine* engine) const { +arrow::Status ExportedDecimalFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); @@ -93,6 +93,7 @@ void ExportedDecimalFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc("gdv_xlarge_compare", types->i32_type() /*return_type*/, args, reinterpret_cast(gdv_xlarge_compare)); + return arrow::Status::OK(); } } // namespace gandiva diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 5ae1d768761..b03513386ab 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -146,8 +146,13 @@ Engine::Engine(const std::shared_ptr& conf, Status Engine::Init() { std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); + bool result = ExportedFuncsRegistry::Register( + std::make_shared(function_registry_)); + if (!result) { + return Status::CodeGenError("Failed to register external stub functions"); + } // Add mappings for global functions that can be accessed from LLVM/IR module. - AddGlobalMappings(); + ARROW_RETURN_NOT_OK(AddGlobalMappings()); return Status::OK(); } @@ -447,7 +452,7 @@ void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_ty execution_engine_->addGlobalMapping(fn, function_ptr); } -void Engine::AddGlobalMappings() { ExportedFuncsRegistry::AddMappings(this); } +arrow::Status Engine::AddGlobalMappings() { return ExportedFuncsRegistry::AddMappings(this); } std::string Engine::DumpIR() { std::string ir; diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 566977dc4ad..df2d8b36d92 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -97,7 +97,7 @@ class GANDIVA_EXPORT Engine { Status LoadExternalPreCompiledIR(); // Create and add mappings for cpp functions that can be accessed from LLVM. - void AddGlobalMappings(); + arrow::Status AddGlobalMappings(); // Remove unused functions to reduce compile time. Status RemoveUnusedFunctions(); diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h index 82aa020a210..e6eb6df2c8f 100644 --- a/cpp/src/gandiva/exported_funcs.h +++ b/cpp/src/gandiva/exported_funcs.h @@ -18,6 +18,7 @@ #pragma once #include +#include "gandiva/function_registry.h" #include "gandiva/visibility.h" namespace gandiva { @@ -29,37 +30,48 @@ class ExportedFuncsBase { public: virtual ~ExportedFuncsBase() = default; - virtual void AddMappings(Engine* engine) const = 0; + virtual arrow::Status AddMappings(Engine* engine) const = 0; }; // Class for exporting Stub functions class ExportedStubFunctions : public ExportedFuncsBase { - void AddMappings(Engine* engine) const override; + arrow::Status AddMappings(Engine* engine) const override; }; // Class for exporting Context functions class ExportedContextFunctions : public ExportedFuncsBase { - void AddMappings(Engine* engine) const override; + arrow::Status AddMappings(Engine* engine) const override; }; // Class for exporting Time functions class ExportedTimeFunctions : public ExportedFuncsBase { - void AddMappings(Engine* engine) const override; + arrow::Status AddMappings(Engine* engine) const override; }; // Class for exporting Decimal functions class ExportedDecimalFunctions : public ExportedFuncsBase { - void AddMappings(Engine* engine) const override; + arrow::Status AddMappings(Engine* engine) const override; }; // Class for exporting String functions class ExportedStringFunctions : public ExportedFuncsBase { - void AddMappings(Engine* engine) const override; + arrow::Status AddMappings(Engine* engine) const override; }; // Class for exporting Hash functions class ExportedHashFunctions : public ExportedFuncsBase { - void AddMappings(Engine* engine) const override; + arrow::Status AddMappings(Engine* engine) const override; +}; + +class ExternalStubFunctions : public ExportedFuncsBase { + public: + ExternalStubFunctions(std::shared_ptr function_registry) + : function_registry_(std::move(function_registry)) {} + + arrow::Status AddMappings(Engine* engine) const override; + + private: + std::shared_ptr function_registry_; }; GANDIVA_EXPORT void RegisterExportedFuncs(); diff --git a/cpp/src/gandiva/exported_funcs_registry.cc b/cpp/src/gandiva/exported_funcs_registry.cc index 2c928a7a2a4..137d29eefbe 100644 --- a/cpp/src/gandiva/exported_funcs_registry.cc +++ b/cpp/src/gandiva/exported_funcs_registry.cc @@ -21,10 +21,11 @@ namespace gandiva { -void ExportedFuncsRegistry::AddMappings(Engine* engine) { +arrow::Status ExportedFuncsRegistry::AddMappings(Engine* engine) { for (const auto& entry : *registered()) { - entry->AddMappings(engine); + ARROW_RETURN_NOT_OK(entry->AddMappings(engine)); } + return arrow::Status::OK(); } const ExportedFuncsRegistry::list_type& ExportedFuncsRegistry::Registered() { diff --git a/cpp/src/gandiva/exported_funcs_registry.h b/cpp/src/gandiva/exported_funcs_registry.h index 08c45aec6a1..a34308bb960 100644 --- a/cpp/src/gandiva/exported_funcs_registry.h +++ b/cpp/src/gandiva/exported_funcs_registry.h @@ -34,7 +34,7 @@ class GANDIVA_EXPORT ExportedFuncsRegistry { using list_type = std::vector>; // Add functions from all the registered classes to the engine. - static void AddMappings(Engine* engine); + static arrow::Status AddMappings(Engine* engine); static bool Register(std::shared_ptr entry) { registered()->emplace_back(std::move(entry)); diff --git a/cpp/src/gandiva/external_stub_functions.cc b/cpp/src/gandiva/external_stub_functions.cc new file mode 100644 index 00000000000..693dd61aff9 --- /dev/null +++ b/cpp/src/gandiva/external_stub_functions.cc @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License + +#include "llvm/IR/Type.h" + +#include "gandiva/engine.h" +#include "gandiva/exported_funcs.h" + +namespace gandiva { +static arrow::Result AsLLVMType(const DataTypePtr& from_type, + LLVMTypes* types) { + switch (from_type->id()) { + case arrow::Type::BOOL: + return types->i1_type(); + case arrow::Type::INT8: + case arrow::Type::UINT8: + return types->i8_type(); + case arrow::Type::INT16: + case arrow::Type::UINT16: + return types->i16_type(); + case arrow::Type::INT32: + case arrow::Type::UINT32: + return types->i32_type(); + case arrow::Type::INT64: + case arrow::Type::UINT64: + return types->i64_type(); + case arrow::Type::FLOAT: + return types->float_type(); + case arrow::Type::DOUBLE: + return types->double_type(); + default: + return Status::NotImplemented("Unsupported arrow data type: " + + from_type->ToString()); + } +} + +arrow::Status ExternalStubFunctions::AddMappings(Engine* engine) const { + auto external_stub_funcs = function_registry_->GetStubFunctions(); + auto types = engine->types(); + for (auto& [func, func_ptr] : external_stub_funcs) { + for (auto& sig : func.signatures()) { + std::vector args; + args.reserve(sig.param_types().size()); + if (func.NeedsContext()) { + args.emplace_back(types->i64_type()); + } + if (func.NeedsFunctionHolder()) { + args.emplace_back(types->i64_type()); + } + for (auto const& arg : sig.param_types()) { + if (arg->id() == arrow::Type::STRING) { + args.emplace_back(types->i8_ptr_type()); + args.emplace_back(types->i32_type()); + } else { + ARROW_ASSIGN_OR_RAISE(auto arg_llvm_type, AsLLVMType(arg, types)); + args.emplace_back(arg_llvm_type); + } + } + llvm::Type* ret_llvm_type; + if (sig.ret_type()->id() == arrow::Type::STRING) { + // for string output, the last arg is the output length + args.emplace_back(types->i32_ptr_type()); + ret_llvm_type = types->i8_ptr_type(); + } else { + ARROW_ASSIGN_OR_RAISE(ret_llvm_type, AsLLVMType(sig.ret_type(), types)); + } + auto return_type = AsLLVMType(sig.ret_type(), types); + engine->AddGlobalMappingForFunc(func.pc_name(), ret_llvm_type, args, func_ptr); + } + } + return arrow::Status::OK(); +} +} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 5d676dfa8df..db0077a9532 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -109,11 +109,22 @@ arrow::Status FunctionRegistry::Register(const std::vector& func return Status::OK(); } +arrow::Status FunctionRegistry::Register(NativeFunction func, void* stub_function_ptr) { + stub_functions_.emplace_back(func, stub_function_ptr); + ARROW_RETURN_NOT_OK(FunctionRegistry::Add(std::move(func))); + return Status::OK(); +} + const std::vector>& FunctionRegistry::GetBitcodeBuffers() const { return bitcode_memory_buffers_; } +const std::vector>& FunctionRegistry::GetStubFunctions() + const { + return stub_functions_; +} + arrow::Result> MakeDefaultFunctionRegistry() { auto registry = std::make_shared(); for (auto const& funcs : diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index 01984961dc9..60bf5be179b 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -52,9 +52,15 @@ class GANDIVA_EXPORT FunctionRegistry { arrow::Status Register(const std::vector& funcs, std::shared_ptr bitcode_buffer); + /// \brief register a stub function into the function registry + arrow::Status Register(NativeFunction func, void* stub_function_ptr); + /// \brief get a list of bitcode memory buffers saved in the registry const std::vector>& GetBitcodeBuffers() const; + /// \brief get a list of stub functions saved in the registry + const std::vector>& GetStubFunctions() const; + iterator begin() const; iterator end() const; iterator back() const; @@ -65,6 +71,7 @@ class GANDIVA_EXPORT FunctionRegistry { std::vector pc_registry_; SignatureMap pc_registry_map_; std::vector> bitcode_memory_buffers_; + std::vector> stub_functions_; Status Add(NativeFunction func); }; diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 67d39aeba55..0ad3c1738e8 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -822,7 +822,7 @@ const char* gdv_mask_show_last_n_utf8_int32(int64_t context, const char* data, namespace gandiva { -void ExportedStubFunctions::AddMappings(Engine* engine) const { +arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); @@ -1268,5 +1268,6 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc("mask_utf8", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(mask_utf8)); + return arrow::Status::OK(); } } // namespace gandiva diff --git a/cpp/src/gandiva/gdv_hash_function_stubs.cc b/cpp/src/gandiva/gdv_hash_function_stubs.cc index 018b0fbb709..aac70a06be6 100644 --- a/cpp/src/gandiva/gdv_hash_function_stubs.cc +++ b/cpp/src/gandiva/gdv_hash_function_stubs.cc @@ -216,7 +216,7 @@ const char* gdv_fn_sha1_decimal128(int64_t context, int64_t x_high, uint64_t x_l namespace gandiva { -void ExportedHashFunctions::AddMappings(Engine* engine) const { +arrow::Status ExportedHashFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); @@ -1041,5 +1041,6 @@ void ExportedHashFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc("gdv_fn_md5_decimal128", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_md5_decimal128)); + return arrow::Status::OK(); } } // namespace gandiva diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc b/cpp/src/gandiva/gdv_string_function_stubs.cc index 3bfb297af14..51b1f3b8007 100644 --- a/cpp/src/gandiva/gdv_string_function_stubs.cc +++ b/cpp/src/gandiva/gdv_string_function_stubs.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//#pragma once +// #pragma once #include "gandiva/gdv_function_stubs.h" @@ -761,7 +761,7 @@ const char* translate_utf8_utf8_utf8(int64_t context, const char* in, int32_t in namespace gandiva { -void ExportedStringFunctions::AddMappings(Engine* engine) const { +arrow::Status ExportedStringFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); @@ -988,5 +988,6 @@ void ExportedStringFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc("translate_utf8_utf8_utf8", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(translate_utf8_utf8_utf8)); + return arrow::Status::OK(); } } // namespace gandiva diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 38566fb408a..b64a58fa7ac 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -3608,4 +3608,28 @@ TEST_F(TestProjector, TestExtendedFunctions) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } +TEST_F(TestProjector, TestExtendedStubFunctions) { + auto in_field = field("in", arrow::int32()); + auto schema = arrow::schema({in_field}); + auto out_field = field("out", arrow::int64()); + auto multiply = + TreeExprBuilder::MakeExpression("multiply_by_three", {in_field}, out_field); + + std::shared_ptr projector; + auto external_registry = std::make_shared(); + auto config_with_func_registry = + TestConfigurationWithExternalStubFunctionRegistry(std::move(external_registry)); + ARROW_EXPECT_OK( + Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); + + int num_records = 4; + auto array = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, true}); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array}); + auto out = MakeArrowArrayInt64({3, 6, 9, 12}, {true, true, true, true}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc index 4a0a15c7223..c0aed0c788e 100644 --- a/cpp/src/gandiva/tests/test_util.cc +++ b/cpp/src/gandiva/tests/test_util.cc @@ -43,6 +43,13 @@ NativeFunction GetTestExternalFunction() { return multiply_by_two_func; } +NativeFunction GetTestExternalStubFunction() { + NativeFunction multiply_by_three_func( + "multiply_by_three", {}, {arrow::int32()}, arrow::int64(), + ResultNullableType::kResultNullIfNull, "multiply_by_three_int32"); + return multiply_by_three_func; +} + std::shared_ptr TestConfigurationWithFunctionRegistry( std::shared_ptr registry) { ARROW_EXPECT_OK( @@ -50,4 +57,14 @@ std::shared_ptr TestConfigurationWithFunctionRegistry( auto external_func_config = ConfigurationBuilder().build(std::move(registry)); return external_func_config; } + +int64_t multiply_by_three(int32_t in) { return in * 3; } + +std::shared_ptr TestConfigurationWithExternalStubFunctionRegistry( + std::shared_ptr registry) { + ARROW_EXPECT_OK(registry->Register(GetTestExternalStubFunction(), + reinterpret_cast(multiply_by_three))); + auto external_func_config = ConfigurationBuilder().build(std::move(registry)); + return external_func_config; +} } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index e431e53096c..bca878cbb94 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -101,7 +101,15 @@ std::shared_ptr TestConfiguration(); std::shared_ptr TestConfigurationWithFunctionRegistry( std::shared_ptr registry); +std::shared_ptr TestConfigurationWithExternalStubFunctionRegistry( + std::shared_ptr registry); + std::string GetTestFunctionLLVMIRPath(); NativeFunction GetTestExternalFunction(); +NativeFunction GetTestExternalStubFunction(); + +extern "C" { +int64_t multiply_by_three(int32_t in); +} } // namespace gandiva From 2eb023796bab492d8063cb473b2d2f8a799a11c3 Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Sat, 28 Oct 2023 21:12:15 +0800 Subject: [PATCH 2/9] Support registering gandiva functions that requires function holder as parameter. --- cpp/src/gandiva/CMakeLists.txt | 1 + cpp/src/gandiva/expr_decomposer.cc | 8 +- .../gandiva/function_holder_maker_registry.cc | 75 +++++++++++++++++ .../gandiva/function_holder_maker_registry.h | 52 ++++++++++++ cpp/src/gandiva/function_holder_registry.h | 80 ------------------- cpp/src/gandiva/function_registry.cc | 15 +++- cpp/src/gandiva/function_registry.h | 18 ++++- cpp/src/gandiva/tests/projector_test.cc | 28 +++++++ cpp/src/gandiva/tests/test_util.cc | 70 +++++++++++++++- cpp/src/gandiva/tests/test_util.h | 8 +- 10 files changed, 263 insertions(+), 92 deletions(-) create mode 100644 cpp/src/gandiva/function_holder_maker_registry.cc create mode 100644 cpp/src/gandiva/function_holder_maker_registry.h delete mode 100644 cpp/src/gandiva/function_holder_registry.h diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 180af6d9cb5..39a4e84431e 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -65,6 +65,7 @@ set(SRC_FILES external_stub_functions.cc filter.cc function_ir_builder.cc + function_holder_maker_registry.cc function_registry.cc function_registry_arithmetic.cc function_registry_datetime.cc diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index 957d9d046bd..42566ca0351 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -25,11 +25,12 @@ #include "gandiva/annotator.h" #include "gandiva/dex.h" -#include "gandiva/function_holder_registry.h" +#include "gandiva/function_holder_maker_registry.h" #include "gandiva/function_registry.h" #include "gandiva/function_signature.h" #include "gandiva/in_holder.h" #include "gandiva/node.h" +#include "gandiva/regex_functions_holder.h" namespace gandiva { @@ -81,9 +82,10 @@ Status ExprDecomposer::Visit(const FunctionNode& in_node) { std::shared_ptr holder; int holder_idx = -1; if (native_function->NeedsFunctionHolder()) { - auto status = FunctionHolderRegistry::Make(desc->name(), node, &holder); + auto function_holder_maker_registry = registry_.GetFunctionHolderMakerRegistry(); + ARROW_ASSIGN_OR_RAISE(holder, + function_holder_maker_registry.Make(desc->name(), node)); holder_idx = annotator_.AddHolderPointer(holder.get()); - ARROW_RETURN_NOT_OK(status); } if (native_function->result_nullable_type() == kResultNullIfNull) { diff --git a/cpp/src/gandiva/function_holder_maker_registry.cc b/cpp/src/gandiva/function_holder_maker_registry.cc new file mode 100644 index 00000000000..169147697d7 --- /dev/null +++ b/cpp/src/gandiva/function_holder_maker_registry.cc @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_holder_maker_registry.h" + +#include + +#include "gandiva/function_holder.h" +#include "gandiva/interval_holder.h" +#include "gandiva/random_generator_holder.h" +#include "gandiva/regex_functions_holder.h" +#include "gandiva/to_date_holder.h" + +namespace gandiva { + +FunctionHolderMakerRegistry::FunctionHolderMakerRegistry() + : function_holder_makers_(DefaultHolderMakers()) {} + +static std::string to_lower(const std::string& str) { + std::string data = str; + std::transform(data.begin(), data.end(), data.begin(), + [](unsigned char c) { return std::tolower(c); }); + return data; +} +arrow::Status FunctionHolderMakerRegistry::Register(const std::string& name, + FunctionHolderMaker holder_maker) { + function_holder_makers_.emplace(to_lower(name), std::move(holder_maker)); + return arrow::Status::OK(); +} + +template +static arrow::Result HolderMaker(const FunctionNode& node) { + std::shared_ptr derived_instance; + ARROW_RETURN_NOT_OK(HolderType::Make(node, &derived_instance)); + return derived_instance; +} + +arrow::Result FunctionHolderMakerRegistry::Make( + const std::string& name, const FunctionNode& node) { + auto lowered_name = to_lower(name); + auto found = function_holder_makers_.find(lowered_name); + if (found == function_holder_makers_.end()) { + return Status::Invalid("function holder not registered for function " + name); + } + + return found->second(node); +} + +FunctionHolderMakerRegistry::MakerMap FunctionHolderMakerRegistry::DefaultHolderMakers() { + static const MakerMap maker_map = { + {"like", HolderMaker}, + {"to_date", HolderMaker}, + {"random", HolderMaker}, + {"rand", HolderMaker}, + {"regexp_replace", HolderMaker}, + {"regexp_extract", HolderMaker}, + {"castintervalday", HolderMaker}, + {"castintervalyear", HolderMaker}}; + return maker_map; +} +} // namespace gandiva diff --git a/cpp/src/gandiva/function_holder_maker_registry.h b/cpp/src/gandiva/function_holder_maker_registry.h new file mode 100644 index 00000000000..f215a4852aa --- /dev/null +++ b/cpp/src/gandiva/function_holder_maker_registry.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "arrow/status.h" +#include "gandiva/function_holder.h" +#include "gandiva/node.h" + +namespace gandiva { + +/// registry of function holder makers +class FunctionHolderMakerRegistry { + public: + using FunctionHolderMaker = + std::function(const FunctionNode&)>; + + FunctionHolderMakerRegistry(); + + arrow::Status Register(const std::string& name, FunctionHolderMaker holder_maker); + + /// \brief lookup a function holder maker using the given function name, + /// and make a FunctionHolderPtr using the found holder maker and the given FunctionNode + arrow::Result Make(const std::string& name, + const FunctionNode& node); + + private: + using MakerMap = std::unordered_map; + + MakerMap function_holder_makers_; + static MakerMap DefaultHolderMakers(); +}; + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h deleted file mode 100644 index 7220f0d9d0d..00000000000 --- a/cpp/src/gandiva/function_holder_registry.h +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -#include "arrow/status.h" -#include "gandiva/function_holder.h" -#include "gandiva/interval_holder.h" -#include "gandiva/node.h" -#include "gandiva/random_generator_holder.h" -#include "gandiva/regex_functions_holder.h" -#include "gandiva/to_date_holder.h" - -namespace gandiva { - -#define LAMBDA_MAKER(derived) \ - [](const FunctionNode& node, FunctionHolderPtr* holder) { \ - std::shared_ptr derived_instance; \ - auto status = derived::Make(node, &derived_instance); \ - if (status.ok()) { \ - *holder = derived_instance; \ - } \ - return status; \ - } - -/// Static registry of function holders. -class FunctionHolderRegistry { - public: - using maker_type = std::function; - using map_type = std::unordered_map; - - static Status Make(const std::string& name, const FunctionNode& node, - FunctionHolderPtr* holder) { - std::string data = name; - std::transform(data.begin(), data.end(), data.begin(), - [](unsigned char c) { return std::tolower(c); }); - - auto found = makers().find(data); - if (found == makers().end()) { - return Status::Invalid("function holder not registered for function " + name); - } - - return found->second(node, holder); - } - - private: - static map_type& makers() { - static map_type maker_map = {{"like", LAMBDA_MAKER(LikeHolder)}, - {"ilike", LAMBDA_MAKER(LikeHolder)}, - {"to_date", LAMBDA_MAKER(ToDateHolder)}, - {"random", LAMBDA_MAKER(RandomGeneratorHolder)}, - {"rand", LAMBDA_MAKER(RandomGeneratorHolder)}, - {"regexp_replace", LAMBDA_MAKER(ReplaceHolder)}, - {"regexp_extract", LAMBDA_MAKER(ExtractHolder)}, - {"castintervalday", LAMBDA_MAKER(IntervalDaysHolder)}, - {"castintervalyear", LAMBDA_MAKER(IntervalYearsHolder)}}; - return maker_map; - } -}; - -} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index db0077a9532..4f78f8e75ef 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -109,7 +109,15 @@ arrow::Status FunctionRegistry::Register(const std::vector& func return Status::OK(); } -arrow::Status FunctionRegistry::Register(NativeFunction func, void* stub_function_ptr) { +arrow::Status FunctionRegistry::Register( + NativeFunction func, void* stub_function_ptr, + std::optional function_holder_maker) { + if (function_holder_maker.has_value()) { + // all signatures should have the same base name, use the first signature's base name + auto const& func_base_name = func.signatures().begin()->base_name(); + ARROW_RETURN_NOT_OK(holder_maker_registry_.Register( + func_base_name, std::move(function_holder_maker.value()))); + } stub_functions_.emplace_back(func, stub_function_ptr); ARROW_RETURN_NOT_OK(FunctionRegistry::Add(std::move(func))); return Status::OK(); @@ -125,6 +133,11 @@ const std::vector>& FunctionRegistry::GetStubFu return stub_functions_; } +const FunctionHolderMakerRegistry& FunctionRegistry::GetFunctionHolderMakerRegistry() + const { + return holder_maker_registry_; +} + arrow::Result> MakeDefaultFunctionRegistry() { auto registry = std::make_shared(); for (auto const& funcs : diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index 60bf5be179b..ec3661d7416 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -18,11 +18,14 @@ #pragma once #include +#include #include #include #include "arrow/buffer.h" #include "arrow/status.h" +#include "gandiva/function_holder.h" +#include "gandiva/function_holder_maker_registry.h" #include "gandiva/function_registry_common.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/native_function.h" @@ -34,6 +37,9 @@ namespace gandiva { class GANDIVA_EXPORT FunctionRegistry { public: using iterator = const NativeFunction*; + using FunctionHolderMaker = + std::function>( + const FunctionNode& function_node)>; FunctionRegistry(); FunctionRegistry(const FunctionRegistry&) = delete; @@ -53,7 +59,14 @@ class GANDIVA_EXPORT FunctionRegistry { std::shared_ptr bitcode_buffer); /// \brief register a stub function into the function registry - arrow::Status Register(NativeFunction func, void* stub_function_ptr); + /// @param func the registered function's metadata + /// @param stub_function_ptr the function pointer to the + /// registered function's implementation + /// @param function_holder_maker this will be used as the function holder if the + /// function requires a function holder + arrow::Status Register( + NativeFunction func, void* stub_function_ptr, + std::optional function_holder_maker = std::nullopt); /// \brief get a list of bitcode memory buffers saved in the registry const std::vector>& GetBitcodeBuffers() const; @@ -61,6 +74,8 @@ class GANDIVA_EXPORT FunctionRegistry { /// \brief get a list of stub functions saved in the registry const std::vector>& GetStubFunctions() const; + const FunctionHolderMakerRegistry& GetFunctionHolderMakerRegistry() const; + iterator begin() const; iterator end() const; iterator back() const; @@ -72,6 +87,7 @@ class GANDIVA_EXPORT FunctionRegistry { SignatureMap pc_registry_map_; std::vector> bitcode_memory_buffers_; std::vector> stub_functions_; + FunctionHolderMakerRegistry holder_maker_registry_; Status Add(NativeFunction func); }; diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index b64a58fa7ac..eb8b03f4315 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -3632,4 +3632,32 @@ TEST_F(TestProjector, TestExtendedStubFunctions) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } +TEST_F(TestProjector, TestExtendedStubFunctionsWithFunctionHolder) { + auto multiple = TreeExprBuilder::MakeLiteral(5); + auto in_field = field("in", arrow::int32()); + auto schema = arrow::schema({in_field}); + auto out_field = field("out", arrow::int64()); + + auto in_node = TreeExprBuilder::MakeField(in_field); + auto multiply_by_n_func = + TreeExprBuilder::MakeFunction("multiply_by_n", {in_node, multiple}, arrow::int64()); + auto multiply = TreeExprBuilder::MakeExpression(multiply_by_n_func, out_field); + + std::shared_ptr projector; + auto external_registry = std::make_shared(); + auto config_with_func_registry = + TestConfigurationWithFunctionHolderRegistry(std::move(external_registry)); + ARROW_EXPECT_OK( + Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); + + int num_records = 4; + auto array = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, true}); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array}); + auto out = MakeArrowArrayInt64({5, 10, 15, 20}, {true, true, true, true}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc index c0aed0c788e..59a0128a909 100644 --- a/cpp/src/gandiva/tests/test_util.cc +++ b/cpp/src/gandiva/tests/test_util.cc @@ -17,8 +17,11 @@ #include "gandiva/tests/test_util.h" +#include + #include "arrow/util/io_util.h" #include "arrow/util/logging.h" +#include "gandiva/function_holder.h" namespace gandiva { std::shared_ptr TestConfiguration() { @@ -43,13 +46,22 @@ NativeFunction GetTestExternalFunction() { return multiply_by_two_func; } -NativeFunction GetTestExternalStubFunction() { +static NativeFunction GetTestExternalStubFunction() { NativeFunction multiply_by_three_func( "multiply_by_three", {}, {arrow::int32()}, arrow::int64(), ResultNullableType::kResultNullIfNull, "multiply_by_three_int32"); return multiply_by_three_func; } +static NativeFunction GetTestFunctionWithFunctionHolder() { + // the 2nd parameter is expected to be an int32 literal + NativeFunction multiply_by_n_func("multiply_by_n", {}, {arrow::int32(), arrow::int32()}, + arrow::int64(), ResultNullableType::kResultNullIfNull, + "multiply_by_n_int32_int32", + NativeFunction::kNeedsFunctionHolder); + return multiply_by_n_func; +} + std::shared_ptr TestConfigurationWithFunctionRegistry( std::shared_ptr registry) { ARROW_EXPECT_OK( @@ -58,7 +70,48 @@ std::shared_ptr TestConfigurationWithFunctionRegistry( return external_func_config; } -int64_t multiply_by_three(int32_t in) { return in * 3; } +class MultiplyHolder : public FunctionHolder { + public: + MultiplyHolder(int32_t num) : num_(num){}; + + static Status Make(const FunctionNode& node, std::shared_ptr* holder) { + ARROW_RETURN_IF(node.children().size() != 2, + Status::Invalid("'multiply_by_n' function requires two parameters")); + + auto literal = dynamic_cast(node.children().at(1).get()); + ARROW_RETURN_IF( + literal == nullptr, + Status::Invalid( + "'multiply_by_n' function requires a literal as the 2nd parameter")); + + auto literal_type = literal->return_type()->id(); + ARROW_RETURN_IF( + literal_type != arrow::Type::INT32, + Status::Invalid( + "'multiply_by_n' function requires an int32 literal as the 2nd parameter")); + + *holder = std::make_shared( + literal->is_null() ? 0 : std::get(literal->holder())); + return Status::OK(); + } + + int32_t operator()() { return num_; } + + private: + int32_t num_; +}; + +extern "C" { +// this function is used as an external stub function for testing so it has to be declared +// with extern C +static int64_t multiply_by_three(int32_t value) { return value * 3; } + +// this function requires a function holder +static int64_t multiply_by_n(int64_t holder_ptr, int32_t value) { + MultiplyHolder* holder = reinterpret_cast(holder_ptr); + return value * (*holder)(); +} +} std::shared_ptr TestConfigurationWithExternalStubFunctionRegistry( std::shared_ptr registry) { @@ -67,4 +120,17 @@ std::shared_ptr TestConfigurationWithExternalStubFunctionRegistry auto external_func_config = ConfigurationBuilder().build(std::move(registry)); return external_func_config; } + +std::shared_ptr TestConfigurationWithFunctionHolderRegistry( + std::shared_ptr registry) { + ARROW_EXPECT_OK(registry->Register( + GetTestFunctionWithFunctionHolder(), reinterpret_cast(multiply_by_n), + [](const FunctionNode& node) -> arrow::Result { + std::shared_ptr derived_instance; + ARROW_RETURN_NOT_OK(MultiplyHolder::Make(node, &derived_instance)); + return derived_instance; + })); + auto external_func_config = ConfigurationBuilder().build(std::move(registry)); + return external_func_config; +} } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index bca878cbb94..5bc5bc3cdf2 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -104,12 +104,10 @@ std::shared_ptr TestConfigurationWithFunctionRegistry( std::shared_ptr TestConfigurationWithExternalStubFunctionRegistry( std::shared_ptr registry); +std::shared_ptr TestConfigurationWithFunctionHolderRegistry( + std::shared_ptr registry); + std::string GetTestFunctionLLVMIRPath(); NativeFunction GetTestExternalFunction(); -NativeFunction GetTestExternalStubFunction(); - -extern "C" { -int64_t multiply_by_three(int32_t in); -} } // namespace gandiva From 23ae3b57d1d9acc3a53a91ec4901eb59d650ba05 Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Sun, 5 Nov 2023 23:23:14 +0800 Subject: [PATCH 3/9] Verify the external gandiva function that needs context to alloc memory can work. --- cpp/src/gandiva/external_stub_functions.cc | 62 +++++++++-------- cpp/src/gandiva/llvm_generator_test.cc | 2 +- cpp/src/gandiva/tests/projector_test.cc | 30 +++++++- cpp/src/gandiva/tests/test_util.cc | 80 ++++++++++++++++------ cpp/src/gandiva/tests/test_util.h | 19 ++++- 5 files changed, 139 insertions(+), 54 deletions(-) diff --git a/cpp/src/gandiva/external_stub_functions.cc b/cpp/src/gandiva/external_stub_functions.cc index 693dd61aff9..669b97669f8 100644 --- a/cpp/src/gandiva/external_stub_functions.cc +++ b/cpp/src/gandiva/external_stub_functions.cc @@ -48,37 +48,45 @@ static arrow::Result AsLLVMType(const DataTypePtr& from_type, } } +// map from a NativeFunction's signature to the corresponding LLVM signature +static arrow::Result, llvm::Type*>> MapToLLVMSignature( + const FunctionSignature& sig, const NativeFunction& func, LLVMTypes* types) { + std::vector args; + args.reserve(sig.param_types().size()); + if (func.NeedsContext()) { + args.emplace_back(types->i64_type()); + } + if (func.NeedsFunctionHolder()) { + args.emplace_back(types->i64_type()); + } + for (auto const& arg : sig.param_types()) { + if (arg->id() == arrow::Type::STRING) { + args.emplace_back(types->i8_ptr_type()); + args.emplace_back(types->i32_type()); + } else { + ARROW_ASSIGN_OR_RAISE(auto arg_llvm_type, AsLLVMType(arg, types)); + args.emplace_back(arg_llvm_type); + } + } + llvm::Type* ret_llvm_type; + if (sig.ret_type()->id() == arrow::Type::STRING) { + // for string output, the last arg is the output length + args.emplace_back(types->i32_ptr_type()); + ret_llvm_type = types->i8_ptr_type(); + } else { + ARROW_ASSIGN_OR_RAISE(ret_llvm_type, AsLLVMType(sig.ret_type(), types)); + } + auto return_type = AsLLVMType(sig.ret_type(), types); + return std::make_pair(args, ret_llvm_type); +} + arrow::Status ExternalStubFunctions::AddMappings(Engine* engine) const { auto external_stub_funcs = function_registry_->GetStubFunctions(); auto types = engine->types(); for (auto& [func, func_ptr] : external_stub_funcs) { - for (auto& sig : func.signatures()) { - std::vector args; - args.reserve(sig.param_types().size()); - if (func.NeedsContext()) { - args.emplace_back(types->i64_type()); - } - if (func.NeedsFunctionHolder()) { - args.emplace_back(types->i64_type()); - } - for (auto const& arg : sig.param_types()) { - if (arg->id() == arrow::Type::STRING) { - args.emplace_back(types->i8_ptr_type()); - args.emplace_back(types->i32_type()); - } else { - ARROW_ASSIGN_OR_RAISE(auto arg_llvm_type, AsLLVMType(arg, types)); - args.emplace_back(arg_llvm_type); - } - } - llvm::Type* ret_llvm_type; - if (sig.ret_type()->id() == arrow::Type::STRING) { - // for string output, the last arg is the output length - args.emplace_back(types->i32_ptr_type()); - ret_llvm_type = types->i8_ptr_type(); - } else { - ARROW_ASSIGN_OR_RAISE(ret_llvm_type, AsLLVMType(sig.ret_type(), types)); - } - auto return_type = AsLLVMType(sig.ret_type(), types); + for (auto const& sig : func.signatures()) { + ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func, types)); + auto& [args, ret_llvm_type] = llvm_signature; engine->AddGlobalMappingForFunc(func.pc_name(), ret_llvm_type, args, func_ptr); } } diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc index 671ce91e870..9ff1de006be 100644 --- a/cpp/src/gandiva/llvm_generator_test.cc +++ b/cpp/src/gandiva/llvm_generator_test.cc @@ -118,7 +118,7 @@ TEST_F(TestLLVMGenerator, TestAdd) { TEST_F(TestLLVMGenerator, VerifyExtendedPCFunctions) { auto external_registry = std::make_shared(); auto config_with_func_registry = - TestConfigurationWithFunctionRegistry(std::move(external_registry)); + TestConfigWithFunctionRegistry(std::move(external_registry)); std::unique_ptr generator; ASSERT_OK(LLVMGenerator::Make(config_with_func_registry, false, &generator)); diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index eb8b03f4315..86a1f0a9eba 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -3594,7 +3594,7 @@ TEST_F(TestProjector, TestExtendedFunctions) { std::shared_ptr projector; auto external_registry = std::make_shared(); auto config_with_func_registry = - TestConfigurationWithFunctionRegistry(std::move(external_registry)); + TestConfigWithFunctionRegistry(std::move(external_registry)); ARROW_EXPECT_OK( Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); @@ -3618,7 +3618,7 @@ TEST_F(TestProjector, TestExtendedStubFunctions) { std::shared_ptr projector; auto external_registry = std::make_shared(); auto config_with_func_registry = - TestConfigurationWithExternalStubFunctionRegistry(std::move(external_registry)); + TestConfigWithStubFunction(std::move(external_registry)); ARROW_EXPECT_OK( Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); @@ -3646,7 +3646,7 @@ TEST_F(TestProjector, TestExtendedStubFunctionsWithFunctionHolder) { std::shared_ptr projector; auto external_registry = std::make_shared(); auto config_with_func_registry = - TestConfigurationWithFunctionHolderRegistry(std::move(external_registry)); + TestConfigWithHolderFunction(std::move(external_registry)); ARROW_EXPECT_OK( Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); @@ -3660,4 +3660,28 @@ TEST_F(TestProjector, TestExtendedStubFunctionsWithFunctionHolder) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } +TEST_F(TestProjector, TestExtendedStubFunctionThatNeedsContext) { + auto in_field = field("in", arrow::utf8()); + auto schema = arrow::schema({in_field}); + auto out_field = field("out", arrow::utf8()); + auto multiply = + TreeExprBuilder::MakeExpression("multiply_by_two_formula", {in_field}, out_field); + + std::shared_ptr projector; + auto external_registry = std::make_shared(); + auto config_with_func_registry = + TestConfigWithContextFunction(std::move(external_registry)); + ARROW_EXPECT_OK( + Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); + + int num_records = 4; + auto array = MakeArrowArrayUtf8({"1", "2", "3", "10"}, {true, true, true, true}); + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array}); + auto out = MakeArrowArrayUtf8({"1x2", "2x2", "3x2", "10x2"}, {true, true, true, true}); + + arrow::ArrayVector outs; + ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs)); + EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); +} + } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc index 59a0128a909..9a4c90a22c0 100644 --- a/cpp/src/gandiva/tests/test_util.cc +++ b/cpp/src/gandiva/tests/test_util.cc @@ -18,10 +18,12 @@ #include "gandiva/tests/test_util.h" #include +#include #include "arrow/util/io_util.h" #include "arrow/util/logging.h" #include "gandiva/function_holder.h" +#include "gandiva/gdv_function_stubs.h" namespace gandiva { std::shared_ptr TestConfiguration() { @@ -62,12 +64,27 @@ static NativeFunction GetTestFunctionWithFunctionHolder() { return multiply_by_n_func; } -std::shared_ptr TestConfigurationWithFunctionRegistry( +static NativeFunction GetTestFunctionWithContext() { + NativeFunction multiply_by_two_formula( + "multiply_by_two_formula", {}, {arrow::utf8()}, arrow::utf8(), + ResultNullableType::kResultNullIfNull, "multiply_by_two_formula_utf8", + NativeFunction::kNeedsContext); + return multiply_by_two_formula; +} + +static std::shared_ptr BuildConfigurationWithRegistry( + std::shared_ptr registry, + const std::function)>& + register_func) { + ARROW_EXPECT_OK(register_func(registry)); + return ConfigurationBuilder().build(std::move(registry)); +} + +std::shared_ptr TestConfigWithFunctionRegistry( std::shared_ptr registry) { - ARROW_EXPECT_OK( - registry->Register({GetTestExternalFunction()}, GetTestFunctionLLVMIRPath())); - auto external_func_config = ConfigurationBuilder().build(std::move(registry)); - return external_func_config; + return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { + return reg->Register({GetTestExternalFunction()}, GetTestFunctionLLVMIRPath()); + }); } class MultiplyHolder : public FunctionHolder { @@ -111,26 +128,49 @@ static int64_t multiply_by_n(int64_t holder_ptr, int32_t value) { MultiplyHolder* holder = reinterpret_cast(holder_ptr); return value * (*holder)(); } + +// given a number string, return a string "{number}x2" +static const char* multiply_by_two_formula(int64_t ctx, const char* value, + int32_t value_len, int32_t* out_len) { + auto result = std::string(value, value_len) + "x2"; + *out_len = static_cast(result.length()); + auto out = reinterpret_cast(gdv_fn_context_arena_malloc(ctx, *out_len)); + if (out == nullptr) { + gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(out, result.c_str(), *out_len); + return out; +} +} + +std::shared_ptr TestConfigWithStubFunction( + std::shared_ptr registry) { + return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { + return reg->Register(GetTestExternalStubFunction(), + reinterpret_cast(multiply_by_three)); + }); } -std::shared_ptr TestConfigurationWithExternalStubFunctionRegistry( +std::shared_ptr TestConfigWithHolderFunction( std::shared_ptr registry) { - ARROW_EXPECT_OK(registry->Register(GetTestExternalStubFunction(), - reinterpret_cast(multiply_by_three))); - auto external_func_config = ConfigurationBuilder().build(std::move(registry)); - return external_func_config; + return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { + return reg->Register( + GetTestFunctionWithFunctionHolder(), reinterpret_cast(multiply_by_n), + [](const FunctionNode& node) -> arrow::Result { + std::shared_ptr derived_instance; + ARROW_RETURN_NOT_OK(MultiplyHolder::Make(node, &derived_instance)); + return derived_instance; + }); + }); } -std::shared_ptr TestConfigurationWithFunctionHolderRegistry( +std::shared_ptr TestConfigWithContextFunction( std::shared_ptr registry) { - ARROW_EXPECT_OK(registry->Register( - GetTestFunctionWithFunctionHolder(), reinterpret_cast(multiply_by_n), - [](const FunctionNode& node) -> arrow::Result { - std::shared_ptr derived_instance; - ARROW_RETURN_NOT_OK(MultiplyHolder::Make(node, &derived_instance)); - return derived_instance; - })); - auto external_func_config = ConfigurationBuilder().build(std::move(registry)); - return external_func_config; + return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { + return reg->Register(GetTestFunctionWithContext(), + reinterpret_cast(multiply_by_two_formula)); + }); } } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index 5bc5bc3cdf2..3bd52fc91cb 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -98,13 +98,26 @@ static inline ArrayPtr MakeArrowTypeArray(const std::shared_ptr std::shared_ptr TestConfiguration(); -std::shared_ptr TestConfigurationWithFunctionRegistry( +// helper function to create a Configuration with an external function registered to the +// given function registry +std::shared_ptr TestConfigWithFunctionRegistry( std::shared_ptr registry); -std::shared_ptr TestConfigurationWithExternalStubFunctionRegistry( +// helper function to create a Configuration with an external stub function registered to +// the given function registry +std::shared_ptr TestConfigWithStubFunction( std::shared_ptr registry); -std::shared_ptr TestConfigurationWithFunctionHolderRegistry( +// helper function to create a Configuration with an external function registered +// to the given function registry, and the external function is a function with a function +// holder +std::shared_ptr TestConfigWithHolderFunction( + std::shared_ptr registry); + +// helper function to create a Configuration with an external function registered +// to the given function registry, and the external function is a function that needs +// context +std::shared_ptr TestConfigWithContextFunction( std::shared_ptr registry); std::string GetTestFunctionLLVMIRPath(); From 3871aac241c3994062d6225299bb679322992b14 Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Wed, 8 Nov 2023 13:44:51 +0800 Subject: [PATCH 4/9] Rename public API to use c interface function to make this term consistent. --- cpp/src/gandiva/CMakeLists.txt | 2 +- cpp/src/gandiva/engine.cc | 2 +- cpp/src/gandiva/exported_funcs.h | 4 ++-- ...unctions.cc => external_c_interface_functions.cc} | 6 +++--- cpp/src/gandiva/function_registry.cc | 6 +++--- cpp/src/gandiva/function_registry.h | 12 ++++++------ cpp/src/gandiva/tests/projector_test.cc | 6 +++--- 7 files changed, 19 insertions(+), 19 deletions(-) rename cpp/src/gandiva/{external_stub_functions.cc => external_c_interface_functions.cc} (94%) diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 39a4e84431e..0290edd44c2 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -62,7 +62,7 @@ set(SRC_FILES expression_registry.cc exported_funcs_registry.cc exported_funcs.cc - external_stub_functions.cc + external_c_interface_functions.cc filter.cc function_ir_builder.cc function_holder_maker_registry.cc diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index b03513386ab..2e7b6493660 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -147,7 +147,7 @@ Engine::Engine(const std::shared_ptr& conf, Status Engine::Init() { std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); bool result = ExportedFuncsRegistry::Register( - std::make_shared(function_registry_)); + std::make_shared(function_registry_)); if (!result) { return Status::CodeGenError("Failed to register external stub functions"); } diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h index e6eb6df2c8f..6250c66bbeb 100644 --- a/cpp/src/gandiva/exported_funcs.h +++ b/cpp/src/gandiva/exported_funcs.h @@ -63,9 +63,9 @@ class ExportedHashFunctions : public ExportedFuncsBase { arrow::Status AddMappings(Engine* engine) const override; }; -class ExternalStubFunctions : public ExportedFuncsBase { +class ExternalCInterfaceFunctions : public ExportedFuncsBase { public: - ExternalStubFunctions(std::shared_ptr function_registry) + ExternalCInterfaceFunctions(std::shared_ptr function_registry) : function_registry_(std::move(function_registry)) {} arrow::Status AddMappings(Engine* engine) const override; diff --git a/cpp/src/gandiva/external_stub_functions.cc b/cpp/src/gandiva/external_c_interface_functions.cc similarity index 94% rename from cpp/src/gandiva/external_stub_functions.cc rename to cpp/src/gandiva/external_c_interface_functions.cc index 669b97669f8..c9f664a32e3 100644 --- a/cpp/src/gandiva/external_stub_functions.cc +++ b/cpp/src/gandiva/external_c_interface_functions.cc @@ -80,10 +80,10 @@ static arrow::Result, llvm::Type*>> MapToLLVM return std::make_pair(args, ret_llvm_type); } -arrow::Status ExternalStubFunctions::AddMappings(Engine* engine) const { - auto external_stub_funcs = function_registry_->GetStubFunctions(); +arrow::Status ExternalCInterfaceFunctions::AddMappings(Engine* engine) const { + auto const& c_interface_funcs = function_registry_->GetCInterfaceFunctions(); auto types = engine->types(); - for (auto& [func, func_ptr] : external_stub_funcs) { + for (auto& [func, func_ptr] : c_interface_funcs) { for (auto const& sig : func.signatures()) { ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func, types)); auto& [args, ret_llvm_type] = llvm_signature; diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 4f78f8e75ef..7fe260d57d8 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -118,7 +118,7 @@ arrow::Status FunctionRegistry::Register( ARROW_RETURN_NOT_OK(holder_maker_registry_.Register( func_base_name, std::move(function_holder_maker.value()))); } - stub_functions_.emplace_back(func, stub_function_ptr); + c_interface_functions_.emplace_back(func, stub_function_ptr); ARROW_RETURN_NOT_OK(FunctionRegistry::Add(std::move(func))); return Status::OK(); } @@ -128,9 +128,9 @@ const std::vector>& FunctionRegistry::GetBitcodeB return bitcode_memory_buffers_; } -const std::vector>& FunctionRegistry::GetStubFunctions() +const std::vector>& FunctionRegistry::GetCInterfaceFunctions() const { - return stub_functions_; + return c_interface_functions_; } const FunctionHolderMakerRegistry& FunctionRegistry::GetFunctionHolderMakerRegistry() diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index ec3661d7416..c1860188d17 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -58,21 +58,21 @@ class GANDIVA_EXPORT FunctionRegistry { arrow::Status Register(const std::vector& funcs, std::shared_ptr bitcode_buffer); - /// \brief register a stub function into the function registry + /// \brief register a C interface function into the function registry /// @param func the registered function's metadata - /// @param stub_function_ptr the function pointer to the + /// @param c_interface_function_ptr the function pointer to the /// registered function's implementation /// @param function_holder_maker this will be used as the function holder if the /// function requires a function holder arrow::Status Register( - NativeFunction func, void* stub_function_ptr, + NativeFunction func, void* c_interface_function_ptr, std::optional function_holder_maker = std::nullopt); /// \brief get a list of bitcode memory buffers saved in the registry const std::vector>& GetBitcodeBuffers() const; - /// \brief get a list of stub functions saved in the registry - const std::vector>& GetStubFunctions() const; + /// \brief get a list of C interface functions saved in the registry + const std::vector>& GetCInterfaceFunctions() const; const FunctionHolderMakerRegistry& GetFunctionHolderMakerRegistry() const; @@ -86,7 +86,7 @@ class GANDIVA_EXPORT FunctionRegistry { std::vector pc_registry_; SignatureMap pc_registry_map_; std::vector> bitcode_memory_buffers_; - std::vector> stub_functions_; + std::vector> c_interface_functions_; FunctionHolderMakerRegistry holder_maker_registry_; Status Add(NativeFunction func); diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 86a1f0a9eba..2a70a414052 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -3608,7 +3608,7 @@ TEST_F(TestProjector, TestExtendedFunctions) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } -TEST_F(TestProjector, TestExtendedStubFunctions) { +TEST_F(TestProjector, TestExtendedCInterfaceFunctions) { auto in_field = field("in", arrow::int32()); auto schema = arrow::schema({in_field}); auto out_field = field("out", arrow::int64()); @@ -3632,7 +3632,7 @@ TEST_F(TestProjector, TestExtendedStubFunctions) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } -TEST_F(TestProjector, TestExtendedStubFunctionsWithFunctionHolder) { +TEST_F(TestProjector, TestExtendedCInterfaceFunctionsWithFunctionHolder) { auto multiple = TreeExprBuilder::MakeLiteral(5); auto in_field = field("in", arrow::int32()); auto schema = arrow::schema({in_field}); @@ -3660,7 +3660,7 @@ TEST_F(TestProjector, TestExtendedStubFunctionsWithFunctionHolder) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } -TEST_F(TestProjector, TestExtendedStubFunctionThatNeedsContext) { +TEST_F(TestProjector, TestExtendedCInterfaceFunctionThatNeedsContext) { auto in_field = field("in", arrow::utf8()); auto schema = arrow::schema({in_field}); auto out_field = field("out", arrow::utf8()); From 93d8ac0f9965bcd7af11c77c49561a2c0da65023 Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Wed, 8 Nov 2023 17:06:15 +0800 Subject: [PATCH 5/9] Add global mapping for external c interface functions and more tests for LLVM generator using external C interface functions. --- cpp/src/gandiva/engine.cc | 11 ++++---- cpp/src/gandiva/llvm_generator.h | 2 +- cpp/src/gandiva/llvm_generator_test.cc | 37 +++++++++++++++++++------ cpp/src/gandiva/tests/projector_test.cc | 2 +- cpp/src/gandiva/tests/test_util.cc | 12 ++++---- cpp/src/gandiva/tests/test_util.h | 2 +- 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 2e7b6493660..a3868d754cb 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -146,11 +146,6 @@ Engine::Engine(const std::shared_ptr& conf, Status Engine::Init() { std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); - bool result = ExportedFuncsRegistry::Register( - std::make_shared(function_registry_)); - if (!result) { - return Status::CodeGenError("Failed to register external stub functions"); - } // Add mappings for global functions that can be accessed from LLVM/IR module. ARROW_RETURN_NOT_OK(AddGlobalMappings()); @@ -452,7 +447,11 @@ void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_ty execution_engine_->addGlobalMapping(fn, function_ptr); } -arrow::Status Engine::AddGlobalMappings() { return ExportedFuncsRegistry::AddMappings(this); } +arrow::Status Engine::AddGlobalMappings() { + ARROW_RETURN_NOT_OK(ExportedFuncsRegistry::AddMappings(this)); + ExternalCInterfaceFunctions c_interface_funcs(function_registry_); + return c_interface_funcs.AddMappings(this); +} std::string Engine::DumpIR() { std::string ir; diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index 1921e256533..fae6ed48def 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -88,7 +88,7 @@ class GANDIVA_EXPORT LLVMGenerator { FRIEND_TEST(TestLLVMGenerator, VerifyPCFunctions); FRIEND_TEST(TestLLVMGenerator, TestAdd); FRIEND_TEST(TestLLVMGenerator, TestNullInternal); - FRIEND_TEST(TestLLVMGenerator, VerifyExtendedPCFunctions); + friend class TestLLVMGenerator; llvm::LLVMContext* context() { return engine_->context(); } llvm::IRBuilder<>* ir_builder() { return engine_->ir_builder(); } diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc index 9ff1de006be..9cd2b945fe2 100644 --- a/cpp/src/gandiva/llvm_generator_test.cc +++ b/cpp/src/gandiva/llvm_generator_test.cc @@ -36,6 +36,24 @@ typedef int64_t (*add_vector_func_t)(int64_t* elements, int nelements); class TestLLVMGenerator : public ::testing::Test { protected: std::shared_ptr registry_ = default_function_registry(); + + public: + // create a Configuration with the given registry and verify that the given function + // exists in the module. + static void VerifyFunctionMapping( + const std::string& function_name, + const std::function( + std::shared_ptr)>& config_factory) { + auto external_registry = std::make_shared(); + auto config = config_factory(std::move(external_registry)); + + std::unique_ptr generator; + ASSERT_OK(LLVMGenerator::Make(config, false, &generator)); + + auto module = generator->module(); + ASSERT_OK(generator->engine_->LoadFunctionIRs()); + EXPECT_NE(module->getFunction(function_name), nullptr); + } }; // Verify that a valid pc function exists for every function in the registry. @@ -116,16 +134,19 @@ TEST_F(TestLLVMGenerator, TestAdd) { } TEST_F(TestLLVMGenerator, VerifyExtendedPCFunctions) { - auto external_registry = std::make_shared(); - auto config_with_func_registry = - TestConfigWithFunctionRegistry(std::move(external_registry)); + VerifyFunctionMapping("multiply_by_two_int32", [](auto registry) { + return TestConfigWithFunctionRegistry(std::move(registry)); + }); +} - std::unique_ptr generator; - ASSERT_OK(LLVMGenerator::Make(config_with_func_registry, false, &generator)); +TEST_F(TestLLVMGenerator, VerifyExtendedCInterfaceFunctions) { + VerifyFunctionMapping("multiply_by_three_int32", [](auto registry) { + return TestConfigWithCInterfaceFunction(std::move(registry)); + }); - auto module = generator->module(); - ASSERT_OK(generator->engine_->LoadFunctionIRs()); - EXPECT_NE(module->getFunction("multiply_by_two_int32"), nullptr); + VerifyFunctionMapping("multiply_by_n_int32_int32", [](auto registry) { + return TestConfigWithHolderFunction(std::move(registry)); + }); } } // namespace gandiva diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 2a70a414052..16f7f994d58 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -3618,7 +3618,7 @@ TEST_F(TestProjector, TestExtendedCInterfaceFunctions) { std::shared_ptr projector; auto external_registry = std::make_shared(); auto config_with_func_registry = - TestConfigWithStubFunction(std::move(external_registry)); + TestConfigWithCInterfaceFunction(std::move(external_registry)); ARROW_EXPECT_OK( Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc index 9a4c90a22c0..e471d51e1f9 100644 --- a/cpp/src/gandiva/tests/test_util.cc +++ b/cpp/src/gandiva/tests/test_util.cc @@ -48,7 +48,7 @@ NativeFunction GetTestExternalFunction() { return multiply_by_two_func; } -static NativeFunction GetTestExternalStubFunction() { +static NativeFunction GetTestExternalCInterfaceFunction() { NativeFunction multiply_by_three_func( "multiply_by_three", {}, {arrow::int32()}, arrow::int64(), ResultNullableType::kResultNullIfNull, "multiply_by_three_int32"); @@ -89,7 +89,7 @@ std::shared_ptr TestConfigWithFunctionRegistry( class MultiplyHolder : public FunctionHolder { public: - MultiplyHolder(int32_t num) : num_(num){}; + explicit MultiplyHolder(int32_t num) : num_(num){}; static Status Make(const FunctionNode& node, std::shared_ptr* holder) { ARROW_RETURN_IF(node.children().size() != 2, @@ -112,7 +112,7 @@ class MultiplyHolder : public FunctionHolder { return Status::OK(); } - int32_t operator()() { return num_; } + int32_t operator()() const { return num_; } private: int32_t num_; @@ -125,7 +125,7 @@ static int64_t multiply_by_three(int32_t value) { return value * 3; } // this function requires a function holder static int64_t multiply_by_n(int64_t holder_ptr, int32_t value) { - MultiplyHolder* holder = reinterpret_cast(holder_ptr); + auto* holder = reinterpret_cast(holder_ptr); return value * (*holder)(); } @@ -145,10 +145,10 @@ static const char* multiply_by_two_formula(int64_t ctx, const char* value, } } -std::shared_ptr TestConfigWithStubFunction( +std::shared_ptr TestConfigWithCInterfaceFunction( std::shared_ptr registry) { return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { - return reg->Register(GetTestExternalStubFunction(), + return reg->Register(GetTestExternalCInterfaceFunction(), reinterpret_cast(multiply_by_three)); }); } diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index 3bd52fc91cb..a5401d93895 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -105,7 +105,7 @@ std::shared_ptr TestConfigWithFunctionRegistry( // helper function to create a Configuration with an external stub function registered to // the given function registry -std::shared_ptr TestConfigWithStubFunction( +std::shared_ptr TestConfigWithCInterfaceFunction( std::shared_ptr registry); // helper function to create a Configuration with an external function registered From 217e9168b06919e3deee9ccc26e519a0bc278c3e Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Wed, 8 Nov 2023 20:41:20 +0800 Subject: [PATCH 6/9] Export gandiva context related function calls so that they can be used with correct visibility on Windows. --- cpp/src/gandiva/exported_funcs.h | 3 ++- cpp/src/gandiva/function_registry.cc | 4 ++-- cpp/src/gandiva/gdv_function_stubs.h | 2 ++ cpp/src/gandiva/tests/test_util.cc | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h index 6250c66bbeb..7bd8c762d84 100644 --- a/cpp/src/gandiva/exported_funcs.h +++ b/cpp/src/gandiva/exported_funcs.h @@ -65,7 +65,8 @@ class ExportedHashFunctions : public ExportedFuncsBase { class ExternalCInterfaceFunctions : public ExportedFuncsBase { public: - ExternalCInterfaceFunctions(std::shared_ptr function_registry) + explicit ExternalCInterfaceFunctions( + std::shared_ptr function_registry) : function_registry_(std::move(function_registry)) {} arrow::Status AddMappings(Engine* engine) const override; diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 7fe260d57d8..939627c9c25 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -128,8 +128,8 @@ const std::vector>& FunctionRegistry::GetBitcodeB return bitcode_memory_buffers_; } -const std::vector>& FunctionRegistry::GetCInterfaceFunctions() - const { +const std::vector>& +FunctionRegistry::GetCInterfaceFunctions() const { return c_interface_functions_; } diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index 5356a91f3ce..3f52537ee05 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -74,8 +74,10 @@ int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* bool in2_validity, int32_t suppress_errors, bool in3_validity, bool* out_valid); +GANDIVA_EXPORT void gdv_fn_context_set_error_msg(int64_t context_ptr, const char* err_msg); +GANDIVA_EXPORT uint8_t* gdv_fn_context_arena_malloc(int64_t context_ptr, int32_t data_len); void gdv_fn_context_arena_reset(int64_t context_ptr); diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc index e471d51e1f9..76e3dde6548 100644 --- a/cpp/src/gandiva/tests/test_util.cc +++ b/cpp/src/gandiva/tests/test_util.cc @@ -89,7 +89,7 @@ std::shared_ptr TestConfigWithFunctionRegistry( class MultiplyHolder : public FunctionHolder { public: - explicit MultiplyHolder(int32_t num) : num_(num){}; + explicit MultiplyHolder(int32_t num) : num_(num) {} static Status Make(const FunctionNode& node, std::shared_ptr* holder) { ARROW_RETURN_IF(node.children().size() != 2, From 427556d961aeb754e9359785d6a0088f8f677cd5 Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Thu, 9 Nov 2023 19:39:33 +0800 Subject: [PATCH 7/9] Rename C interface function to C function for simplicity. --- cpp/src/gandiva/CMakeLists.txt | 2 +- cpp/src/gandiva/engine.cc | 4 +-- cpp/src/gandiva/exported_funcs.h | 5 ++-- ...e_functions.cc => external_c_functions.cc} | 25 +++++++++---------- .../gandiva/function_holder_maker_registry.cc | 13 ++++------ cpp/src/gandiva/function_registry.cc | 10 ++++---- cpp/src/gandiva/function_registry.h | 12 ++++----- cpp/src/gandiva/gdv_string_function_stubs.cc | 2 -- cpp/src/gandiva/llvm_generator_test.cc | 4 +-- cpp/src/gandiva/tests/projector_test.cc | 9 +++---- cpp/src/gandiva/tests/test_util.cc | 19 ++++++-------- cpp/src/gandiva/tests/test_util.h | 4 +-- 12 files changed, 48 insertions(+), 61 deletions(-) rename cpp/src/gandiva/{external_c_interface_functions.cc => external_c_functions.cc} (81%) diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 0290edd44c2..abbf0ede031 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -62,7 +62,7 @@ set(SRC_FILES expression_registry.cc exported_funcs_registry.cc exported_funcs.cc - external_c_interface_functions.cc + external_c_functions.cc filter.cc function_ir_builder.cc function_holder_maker_registry.cc diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index a3868d754cb..1cea1fd2cbf 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -449,8 +449,8 @@ void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_ty arrow::Status Engine::AddGlobalMappings() { ARROW_RETURN_NOT_OK(ExportedFuncsRegistry::AddMappings(this)); - ExternalCInterfaceFunctions c_interface_funcs(function_registry_); - return c_interface_funcs.AddMappings(this); + ExternalCFunctions c_funcs(function_registry_); + return c_funcs.AddMappings(this); } std::string Engine::DumpIR() { diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h index 7bd8c762d84..414ec5c5bfd 100644 --- a/cpp/src/gandiva/exported_funcs.h +++ b/cpp/src/gandiva/exported_funcs.h @@ -63,10 +63,9 @@ class ExportedHashFunctions : public ExportedFuncsBase { arrow::Status AddMappings(Engine* engine) const override; }; -class ExternalCInterfaceFunctions : public ExportedFuncsBase { +class ExternalCFunctions : public ExportedFuncsBase { public: - explicit ExternalCInterfaceFunctions( - std::shared_ptr function_registry) + explicit ExternalCFunctions(std::shared_ptr function_registry) : function_registry_(std::move(function_registry)) {} arrow::Status AddMappings(Engine* engine) const override; diff --git a/cpp/src/gandiva/external_c_interface_functions.cc b/cpp/src/gandiva/external_c_functions.cc similarity index 81% rename from cpp/src/gandiva/external_c_interface_functions.cc rename to cpp/src/gandiva/external_c_functions.cc index c9f664a32e3..d10bc927503 100644 --- a/cpp/src/gandiva/external_c_interface_functions.cc +++ b/cpp/src/gandiva/external_c_functions.cc @@ -51,39 +51,38 @@ static arrow::Result AsLLVMType(const DataTypePtr& from_type, // map from a NativeFunction's signature to the corresponding LLVM signature static arrow::Result, llvm::Type*>> MapToLLVMSignature( const FunctionSignature& sig, const NativeFunction& func, LLVMTypes* types) { - std::vector args; - args.reserve(sig.param_types().size()); + std::vector arg_llvm_types; + arg_llvm_types.reserve(sig.param_types().size()); if (func.NeedsContext()) { - args.emplace_back(types->i64_type()); + arg_llvm_types.emplace_back(types->i64_type()); } if (func.NeedsFunctionHolder()) { - args.emplace_back(types->i64_type()); + arg_llvm_types.emplace_back(types->i64_type()); } for (auto const& arg : sig.param_types()) { if (arg->id() == arrow::Type::STRING) { - args.emplace_back(types->i8_ptr_type()); - args.emplace_back(types->i32_type()); + arg_llvm_types.emplace_back(types->i8_ptr_type()); + arg_llvm_types.emplace_back(types->i32_type()); } else { ARROW_ASSIGN_OR_RAISE(auto arg_llvm_type, AsLLVMType(arg, types)); - args.emplace_back(arg_llvm_type); + arg_llvm_types.emplace_back(arg_llvm_type); } } llvm::Type* ret_llvm_type; if (sig.ret_type()->id() == arrow::Type::STRING) { // for string output, the last arg is the output length - args.emplace_back(types->i32_ptr_type()); + arg_llvm_types.emplace_back(types->i32_ptr_type()); ret_llvm_type = types->i8_ptr_type(); } else { ARROW_ASSIGN_OR_RAISE(ret_llvm_type, AsLLVMType(sig.ret_type(), types)); } - auto return_type = AsLLVMType(sig.ret_type(), types); - return std::make_pair(args, ret_llvm_type); + return std::make_pair(std::move(arg_llvm_types), ret_llvm_type); } -arrow::Status ExternalCInterfaceFunctions::AddMappings(Engine* engine) const { - auto const& c_interface_funcs = function_registry_->GetCInterfaceFunctions(); +arrow::Status ExternalCFunctions::AddMappings(Engine* engine) const { + auto const& c_funcs = function_registry_->GetCFunctions(); auto types = engine->types(); - for (auto& [func, func_ptr] : c_interface_funcs) { + for (auto& [func, func_ptr] : c_funcs) { for (auto const& sig : func.signatures()) { ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func, types)); auto& [args, ret_llvm_type] = llvm_signature; diff --git a/cpp/src/gandiva/function_holder_maker_registry.cc b/cpp/src/gandiva/function_holder_maker_registry.cc index 169147697d7..bb93402475a 100644 --- a/cpp/src/gandiva/function_holder_maker_registry.cc +++ b/cpp/src/gandiva/function_holder_maker_registry.cc @@ -19,6 +19,7 @@ #include +#include "arrow/util/string.h" #include "gandiva/function_holder.h" #include "gandiva/interval_holder.h" #include "gandiva/random_generator_holder.h" @@ -27,18 +28,14 @@ namespace gandiva { +using arrow::internal::AsciiToLower; + FunctionHolderMakerRegistry::FunctionHolderMakerRegistry() : function_holder_makers_(DefaultHolderMakers()) {} -static std::string to_lower(const std::string& str) { - std::string data = str; - std::transform(data.begin(), data.end(), data.begin(), - [](unsigned char c) { return std::tolower(c); }); - return data; -} arrow::Status FunctionHolderMakerRegistry::Register(const std::string& name, FunctionHolderMaker holder_maker) { - function_holder_makers_.emplace(to_lower(name), std::move(holder_maker)); + function_holder_makers_.emplace(AsciiToLower(name), std::move(holder_maker)); return arrow::Status::OK(); } @@ -51,7 +48,7 @@ static arrow::Result HolderMaker(const FunctionNode& node) { arrow::Result FunctionHolderMakerRegistry::Make( const std::string& name, const FunctionNode& node) { - auto lowered_name = to_lower(name); + auto lowered_name = AsciiToLower(name); auto found = function_holder_makers_.find(lowered_name); if (found == function_holder_makers_.end()) { return Status::Invalid("function holder not registered for function " + name); diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 939627c9c25..714e116e238 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -110,7 +110,7 @@ arrow::Status FunctionRegistry::Register(const std::vector& func } arrow::Status FunctionRegistry::Register( - NativeFunction func, void* stub_function_ptr, + NativeFunction func, void* c_function_ptr, std::optional function_holder_maker) { if (function_holder_maker.has_value()) { // all signatures should have the same base name, use the first signature's base name @@ -118,7 +118,7 @@ arrow::Status FunctionRegistry::Register( ARROW_RETURN_NOT_OK(holder_maker_registry_.Register( func_base_name, std::move(function_holder_maker.value()))); } - c_interface_functions_.emplace_back(func, stub_function_ptr); + c_functions_.emplace_back(func, c_function_ptr); ARROW_RETURN_NOT_OK(FunctionRegistry::Add(std::move(func))); return Status::OK(); } @@ -128,9 +128,9 @@ const std::vector>& FunctionRegistry::GetBitcodeB return bitcode_memory_buffers_; } -const std::vector>& -FunctionRegistry::GetCInterfaceFunctions() const { - return c_interface_functions_; +const std::vector>& FunctionRegistry::GetCFunctions() + const { + return c_functions_; } const FunctionHolderMakerRegistry& FunctionRegistry::GetFunctionHolderMakerRegistry() diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index c1860188d17..24b64fac5f3 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -58,21 +58,21 @@ class GANDIVA_EXPORT FunctionRegistry { arrow::Status Register(const std::vector& funcs, std::shared_ptr bitcode_buffer); - /// \brief register a C interface function into the function registry + /// \brief register a C function into the function registry /// @param func the registered function's metadata - /// @param c_interface_function_ptr the function pointer to the + /// @param c_function_ptr the function pointer to the /// registered function's implementation /// @param function_holder_maker this will be used as the function holder if the /// function requires a function holder arrow::Status Register( - NativeFunction func, void* c_interface_function_ptr, + NativeFunction func, void* c_function_ptr, std::optional function_holder_maker = std::nullopt); /// \brief get a list of bitcode memory buffers saved in the registry const std::vector>& GetBitcodeBuffers() const; - /// \brief get a list of C interface functions saved in the registry - const std::vector>& GetCInterfaceFunctions() const; + /// \brief get a list of C functions saved in the registry + const std::vector>& GetCFunctions() const; const FunctionHolderMakerRegistry& GetFunctionHolderMakerRegistry() const; @@ -86,7 +86,7 @@ class GANDIVA_EXPORT FunctionRegistry { std::vector pc_registry_; SignatureMap pc_registry_map_; std::vector> bitcode_memory_buffers_; - std::vector> c_interface_functions_; + std::vector> c_functions_; FunctionHolderMakerRegistry holder_maker_registry_; Status Add(NativeFunction func); diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc b/cpp/src/gandiva/gdv_string_function_stubs.cc index 51b1f3b8007..9f5b5ce64b4 100644 --- a/cpp/src/gandiva/gdv_string_function_stubs.cc +++ b/cpp/src/gandiva/gdv_string_function_stubs.cc @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -// #pragma once - #include "gandiva/gdv_function_stubs.h" #include diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc index 9cd2b945fe2..853d8ae6c3b 100644 --- a/cpp/src/gandiva/llvm_generator_test.cc +++ b/cpp/src/gandiva/llvm_generator_test.cc @@ -139,9 +139,9 @@ TEST_F(TestLLVMGenerator, VerifyExtendedPCFunctions) { }); } -TEST_F(TestLLVMGenerator, VerifyExtendedCInterfaceFunctions) { +TEST_F(TestLLVMGenerator, VerifyExtendedCFunctions) { VerifyFunctionMapping("multiply_by_three_int32", [](auto registry) { - return TestConfigWithCInterfaceFunction(std::move(registry)); + return TestConfigWithCFunction(std::move(registry)); }); VerifyFunctionMapping("multiply_by_n_int32_int32", [](auto registry) { diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 16f7f994d58..59eeb3d92f1 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -3608,7 +3608,7 @@ TEST_F(TestProjector, TestExtendedFunctions) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } -TEST_F(TestProjector, TestExtendedCInterfaceFunctions) { +TEST_F(TestProjector, TestExtendedCFunctions) { auto in_field = field("in", arrow::int32()); auto schema = arrow::schema({in_field}); auto out_field = field("out", arrow::int64()); @@ -3617,8 +3617,7 @@ TEST_F(TestProjector, TestExtendedCInterfaceFunctions) { std::shared_ptr projector; auto external_registry = std::make_shared(); - auto config_with_func_registry = - TestConfigWithCInterfaceFunction(std::move(external_registry)); + auto config_with_func_registry = TestConfigWithCFunction(std::move(external_registry)); ARROW_EXPECT_OK( Projector::Make(schema, {multiply}, config_with_func_registry, &projector)); @@ -3632,7 +3631,7 @@ TEST_F(TestProjector, TestExtendedCInterfaceFunctions) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } -TEST_F(TestProjector, TestExtendedCInterfaceFunctionsWithFunctionHolder) { +TEST_F(TestProjector, TestExtendedCFunctionsWithFunctionHolder) { auto multiple = TreeExprBuilder::MakeLiteral(5); auto in_field = field("in", arrow::int32()); auto schema = arrow::schema({in_field}); @@ -3660,7 +3659,7 @@ TEST_F(TestProjector, TestExtendedCInterfaceFunctionsWithFunctionHolder) { EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0)); } -TEST_F(TestProjector, TestExtendedCInterfaceFunctionThatNeedsContext) { +TEST_F(TestProjector, TestExtendedCFunctionThatNeedsContext) { auto in_field = field("in", arrow::utf8()); auto schema = arrow::schema({in_field}); auto out_field = field("out", arrow::utf8()); diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc index 76e3dde6548..959ea3cd7a4 100644 --- a/cpp/src/gandiva/tests/test_util.cc +++ b/cpp/src/gandiva/tests/test_util.cc @@ -48,7 +48,7 @@ NativeFunction GetTestExternalFunction() { return multiply_by_two_func; } -static NativeFunction GetTestExternalCInterfaceFunction() { +static NativeFunction GetTestExternalCFunction() { NativeFunction multiply_by_three_func( "multiply_by_three", {}, {arrow::int32()}, arrow::int64(), ResultNullableType::kResultNullIfNull, "multiply_by_three_int32"); @@ -91,7 +91,7 @@ class MultiplyHolder : public FunctionHolder { public: explicit MultiplyHolder(int32_t num) : num_(num) {} - static Status Make(const FunctionNode& node, std::shared_ptr* holder) { + static arrow::Result> Make(const FunctionNode& node) { ARROW_RETURN_IF(node.children().size() != 2, Status::Invalid("'multiply_by_n' function requires two parameters")); @@ -107,9 +107,8 @@ class MultiplyHolder : public FunctionHolder { Status::Invalid( "'multiply_by_n' function requires an int32 literal as the 2nd parameter")); - *holder = std::make_shared( + return std::make_shared( literal->is_null() ? 0 : std::get(literal->holder())); - return Status::OK(); } int32_t operator()() const { return num_; } @@ -119,7 +118,7 @@ class MultiplyHolder : public FunctionHolder { }; extern "C" { -// this function is used as an external stub function for testing so it has to be declared +// this function is used as an external C function for testing so it has to be declared // with extern C static int64_t multiply_by_three(int32_t value) { return value * 3; } @@ -145,10 +144,10 @@ static const char* multiply_by_two_formula(int64_t ctx, const char* value, } } -std::shared_ptr TestConfigWithCInterfaceFunction( +std::shared_ptr TestConfigWithCFunction( std::shared_ptr registry) { return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { - return reg->Register(GetTestExternalCInterfaceFunction(), + return reg->Register(GetTestExternalCFunction(), reinterpret_cast(multiply_by_three)); }); } @@ -158,11 +157,7 @@ std::shared_ptr TestConfigWithHolderFunction( return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { return reg->Register( GetTestFunctionWithFunctionHolder(), reinterpret_cast(multiply_by_n), - [](const FunctionNode& node) -> arrow::Result { - std::shared_ptr derived_instance; - ARROW_RETURN_NOT_OK(MultiplyHolder::Make(node, &derived_instance)); - return derived_instance; - }); + [](const FunctionNode& node) { return MultiplyHolder::Make(node); }); }); } diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index a5401d93895..69d63732aee 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -103,9 +103,9 @@ std::shared_ptr TestConfiguration(); std::shared_ptr TestConfigWithFunctionRegistry( std::shared_ptr registry); -// helper function to create a Configuration with an external stub function registered to +// helper function to create a Configuration with an external C function registered to // the given function registry -std::shared_ptr TestConfigWithCInterfaceFunction( +std::shared_ptr TestConfigWithCFunction( std::shared_ptr registry); // helper function to create a Configuration with an external function registered From e9e248b43a9f88eeac56470efd5974241905f4ad Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Fri, 10 Nov 2023 17:26:56 +0800 Subject: [PATCH 8/9] Refactor the code to use the IRtype API in engine class, and calculate the correct number of arguments up front. --- cpp/src/gandiva/external_c_functions.cc | 61 +++++++++---------------- cpp/src/gandiva/function_registry.cc | 7 ++- 2 files changed, 24 insertions(+), 44 deletions(-) diff --git a/cpp/src/gandiva/external_c_functions.cc b/cpp/src/gandiva/external_c_functions.cc index d10bc927503..5560f2503c5 100644 --- a/cpp/src/gandiva/external_c_functions.cc +++ b/cpp/src/gandiva/external_c_functions.cc @@ -15,73 +15,54 @@ // specific language governing permissions and limitations // under the License -#include "llvm/IR/Type.h" +#include #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" namespace gandiva { -static arrow::Result AsLLVMType(const DataTypePtr& from_type, - LLVMTypes* types) { - switch (from_type->id()) { - case arrow::Type::BOOL: - return types->i1_type(); - case arrow::Type::INT8: - case arrow::Type::UINT8: - return types->i8_type(); - case arrow::Type::INT16: - case arrow::Type::UINT16: - return types->i16_type(); - case arrow::Type::INT32: - case arrow::Type::UINT32: - return types->i32_type(); - case arrow::Type::INT64: - case arrow::Type::UINT64: - return types->i64_type(); - case arrow::Type::FLOAT: - return types->float_type(); - case arrow::Type::DOUBLE: - return types->double_type(); - default: - return Status::NotImplemented("Unsupported arrow data type: " + - from_type->ToString()); +// calculate the number of arguments for a function signature +static size_t GetNumArgs(const FunctionSignature& sig, const NativeFunction& func) { + auto num_args = 0; + num_args += func.NeedsContext() ? 1 : 0; + num_args += func.NeedsFunctionHolder() ? 1 : 0; + for (auto const& arg : sig.param_types()) { + num_args += arg->id() == arrow::Type::STRING ? 2 : 1; } + num_args += sig.ret_type()->id() == arrow::Type::STRING ? 1 : 0; + return num_args; } // map from a NativeFunction's signature to the corresponding LLVM signature -static arrow::Result, llvm::Type*>> MapToLLVMSignature( +static Result, llvm::Type*>> MapToLLVMSignature( const FunctionSignature& sig, const NativeFunction& func, LLVMTypes* types) { std::vector arg_llvm_types; - arg_llvm_types.reserve(sig.param_types().size()); + arg_llvm_types.reserve(GetNumArgs(sig, func)); + if (func.NeedsContext()) { - arg_llvm_types.emplace_back(types->i64_type()); + arg_llvm_types.push_back(types->i64_type()); } if (func.NeedsFunctionHolder()) { - arg_llvm_types.emplace_back(types->i64_type()); + arg_llvm_types.push_back(types->i64_type()); } for (auto const& arg : sig.param_types()) { + arg_llvm_types.push_back(types->IRType(arg->id())); if (arg->id() == arrow::Type::STRING) { - arg_llvm_types.emplace_back(types->i8_ptr_type()); - arg_llvm_types.emplace_back(types->i32_type()); - } else { - ARROW_ASSIGN_OR_RAISE(auto arg_llvm_type, AsLLVMType(arg, types)); - arg_llvm_types.emplace_back(arg_llvm_type); + // string type needs an additional length argument + arg_llvm_types.push_back(types->i32_type()); } } - llvm::Type* ret_llvm_type; if (sig.ret_type()->id() == arrow::Type::STRING) { // for string output, the last arg is the output length - arg_llvm_types.emplace_back(types->i32_ptr_type()); - ret_llvm_type = types->i8_ptr_type(); - } else { - ARROW_ASSIGN_OR_RAISE(ret_llvm_type, AsLLVMType(sig.ret_type(), types)); + arg_llvm_types.push_back(types->i32_ptr_type()); } + auto ret_llvm_type = types->IRType(sig.ret_type()->id()); return std::make_pair(std::move(arg_llvm_types), ret_llvm_type); } arrow::Status ExternalCFunctions::AddMappings(Engine* engine) const { auto const& c_funcs = function_registry_->GetCFunctions(); - auto types = engine->types(); + auto const types = engine->types(); for (auto& [func, func_ptr] : c_funcs) { for (auto const& sig : func.signatures()) { ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func, types)); diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 714e116e238..2e392630ee0 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -64,7 +64,7 @@ FunctionRegistry::iterator FunctionRegistry::back() const { const NativeFunction* FunctionRegistry::LookupSignature( const FunctionSignature& signature) const { - auto got = pc_registry_map_.find(&signature); + auto const got = pc_registry_map_.find(&signature); return got == pc_registry_map_.end() ? nullptr : got->second; } @@ -116,11 +116,10 @@ arrow::Status FunctionRegistry::Register( // all signatures should have the same base name, use the first signature's base name auto const& func_base_name = func.signatures().begin()->base_name(); ARROW_RETURN_NOT_OK(holder_maker_registry_.Register( - func_base_name, std::move(function_holder_maker.value()))); + func_base_name, std::move(function_holder_maker).value())); } c_functions_.emplace_back(func, c_function_ptr); - ARROW_RETURN_NOT_OK(FunctionRegistry::Add(std::move(func))); - return Status::OK(); + return FunctionRegistry::Add(std::move(func)); } const std::vector>& FunctionRegistry::GetBitcodeBuffers() From ec24d234f13de780faed825c8a08b02cb2f4ca43 Mon Sep 17 00:00:00 2001 From: Yue Ni Date: Mon, 13 Nov 2023 09:53:52 +0800 Subject: [PATCH 9/9] Change several functions in external_c_functions from static functions into anonymous namespace. --- cpp/src/gandiva/CMakeLists.txt | 2 +- cpp/src/gandiva/external_c_functions.cc | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index abbf0ede031..3f038f54a7b 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -64,8 +64,8 @@ set(SRC_FILES exported_funcs.cc external_c_functions.cc filter.cc - function_ir_builder.cc function_holder_maker_registry.cc + function_ir_builder.cc function_registry.cc function_registry_arithmetic.cc function_registry_datetime.cc diff --git a/cpp/src/gandiva/external_c_functions.cc b/cpp/src/gandiva/external_c_functions.cc index 5560f2503c5..fcba00aed35 100644 --- a/cpp/src/gandiva/external_c_functions.cc +++ b/cpp/src/gandiva/external_c_functions.cc @@ -20,9 +20,10 @@ #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" -namespace gandiva { +namespace { // calculate the number of arguments for a function signature -static size_t GetNumArgs(const FunctionSignature& sig, const NativeFunction& func) { +size_t GetNumArgs(const gandiva::FunctionSignature& sig, + const gandiva::NativeFunction& func) { auto num_args = 0; num_args += func.NeedsContext() ? 1 : 0; num_args += func.NeedsFunctionHolder() ? 1 : 0; @@ -34,8 +35,9 @@ static size_t GetNumArgs(const FunctionSignature& sig, const NativeFunction& fun } // map from a NativeFunction's signature to the corresponding LLVM signature -static Result, llvm::Type*>> MapToLLVMSignature( - const FunctionSignature& sig, const NativeFunction& func, LLVMTypes* types) { +arrow::Result, llvm::Type*>> MapToLLVMSignature( + const gandiva::FunctionSignature& sig, const gandiva::NativeFunction& func, + gandiva::LLVMTypes* types) { std::vector arg_llvm_types; arg_llvm_types.reserve(GetNumArgs(sig, func)); @@ -59,8 +61,10 @@ static Result, llvm::Type*>> MapToLLVMSignatu auto ret_llvm_type = types->IRType(sig.ret_type()->id()); return std::make_pair(std::move(arg_llvm_types), ret_llvm_type); } +} // namespace -arrow::Status ExternalCFunctions::AddMappings(Engine* engine) const { +namespace gandiva { +Status ExternalCFunctions::AddMappings(Engine* engine) const { auto const& c_funcs = function_registry_->GetCFunctions(); auto const types = engine->types(); for (auto& [func, func_ptr] : c_funcs) { @@ -70,6 +74,6 @@ arrow::Status ExternalCFunctions::AddMappings(Engine* engine) const { engine->AddGlobalMappingForFunc(func.pc_name(), ret_llvm_type, args, func_ptr); } } - return arrow::Status::OK(); + return Status::OK(); } } // namespace gandiva