diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index d773fb5ff58..633bf07294c 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -36,6 +36,7 @@ provide_cmake_module(GandivaAddBitcode "Gandiva") # Set the path where the bitcode file generated, see precompiled/CMakeLists.txt set(GANDIVA_PRECOMPILED_BC_PATH "${CMAKE_CURRENT_BINARY_DIR}/irhelpers.bc") +set(GANDIVA_PRECOMPILED_MANDATORY_BC_PATH "${CMAKE_CURRENT_BINARY_DIR}/mandatory_ir.bc") set(GANDIVA_PRECOMPILED_CC_PATH "${CMAKE_CURRENT_BINARY_DIR}/precompiled_bitcode.cc") set(GANDIVA_PRECOMPILED_CC_IN_PATH "${CMAKE_CURRENT_SOURCE_DIR}/precompiled_bitcode.cc.in") diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index bfce72cefc6..f9f588f14e7 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -115,6 +115,9 @@ namespace gandiva { extern const unsigned char kPrecompiledBitcode[]; extern const size_t kPrecompiledBitcodeSize; +extern const unsigned char kPrecompiledMandatoryBitcode[]; +extern const size_t kPrecompiledMandatoryBitcodeSize; + std::once_flag llvm_init_once_flag; static bool llvm_init = false; static llvm::StringRef cpu_name; @@ -132,8 +135,8 @@ arrow::Result AsArrowResult(llvm::Expected& expected, Result MakeTargetMachineBuilder( const Configuration& conf) { - llvm::orc::JITTargetMachineBuilder jtmb( - (llvm::Triple(llvm::sys::getDefaultTargetTriple()))); + static auto default_target_triple = llvm::sys::getDefaultTargetTriple(); + llvm::orc::JITTargetMachineBuilder jtmb((llvm::Triple(default_target_triple))); if (conf.target_host_cpu()) { jtmb.setCPU(cpu_name.str()); jtmb.addFeatures(cpu_attrs); @@ -156,10 +159,9 @@ std::string DumpModuleIR(const llvm::Module& module) { return ir; } -void AddAbsoluteSymbol(llvm::orc::LLJIT& lljit, const std::string& name, +void AddAbsoluteSymbol(llvm::orc::JITDylib& jit_dylib, + llvm::orc::MangleAndInterner& mangle, const std::string& name, void* function_ptr) { - llvm::orc::MangleAndInterner mangle(lljit.getExecutionSession(), lljit.getDataLayout()); - // https://github.com/llvm/llvm-project/commit/8b1771bd9f304be39d4dcbdcccedb6d3bcd18200#diff-77984a824d9182e5c67a481740f3bc5da78d5bd4cf6e1716a083ddb30a4a4931 // LLVM 17 introduced ExecutorSymbolDef and move most of ORC APIs to ExecutorAddr #if LLVM_VERSION_MAJOR >= 17 @@ -171,21 +173,21 @@ void AddAbsoluteSymbol(llvm::orc::LLJIT& lljit, const std::string& name, llvm::JITSymbolFlags::Exported); #endif - auto error = lljit.getMainJITDylib().define( - llvm::orc::absoluteSymbols({{mangle(name), symbol}})); + auto error = jit_dylib.define(llvm::orc::absoluteSymbols({{mangle(name), symbol}})); llvm::cantFail(std::move(error)); } // add current process symbol to dylib // LLVM >= 18 does this automatically -void AddProcessSymbol(llvm::orc::LLJIT& lljit) { +void AddProcessSymbol(llvm::orc::LLJIT& lljit, llvm::orc::MangleAndInterner& mangle) { lljit.getMainJITDylib().addGenerator( llvm::cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( lljit.getDataLayout().getGlobalPrefix()))); // the `atexit` symbol cannot be found for ASAN #ifdef ADDRESS_SANITIZER if (!lljit.lookup("atexit")) { - AddAbsoluteSymbol(lljit, "atexit", reinterpret_cast(atexit)); + AddAbsoluteSymbol(lljit.getMainJITDylib(), mangle, "atexit", + reinterpret_cast(atexit)); } #endif } @@ -211,7 +213,8 @@ Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder& jit_builder) { Result> BuildJIT( llvm::orc::JITTargetMachineBuilder jtmb, - std::optional>& object_cache) { + std::unique_ptr target_machine, + llvm::ObjectCache* object_cache) { llvm::orc::LLJITBuilder jit_builder; #ifdef JIT_LINK_SUPPORTED @@ -219,30 +222,21 @@ Result> BuildJIT( #endif jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); - if (object_cache.has_value()) { - jit_builder.setCompileFunctionCreator( - [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB) - -> llvm::Expected> { - auto target_machine = JTMB.createTargetMachine(); - if (!target_machine) { - return target_machine.takeError(); - } - // after compilation, the object code will be stored into the given object - // cache - return std::make_unique( - std::move(*target_machine), &object_cache.value().get()); - }); - } + jit_builder.setCompileFunctionCreator( + [&object_cache, &target_machine](llvm::orc::JITTargetMachineBuilder JTMB) + -> llvm::Expected> { + // after compilation, the object code will be stored into the given object + // cache + return std::make_unique( + std::move(target_machine), object_cache); + }); auto maybe_jit = jit_builder.create(); ARROW_ASSIGN_OR_RAISE(auto jit, AsArrowResult(maybe_jit, "Could not create LLJIT instance: ")); - - AddProcessSymbol(*jit); return jit; } -Status Engine::SetLLVMObjectCache(GandivaObjectCache& object_cache) { - auto cached_buffer = object_cache.getObject(nullptr); +Status Engine::SetCachedObjectCode(std::unique_ptr cached_buffer) { if (cached_buffer) { auto error = lljit_->addObjectFile(std::move(cached_buffer)); if (error) { @@ -280,7 +274,7 @@ void Engine::InitOnce() { Engine::Engine(const std::shared_ptr& conf, std::unique_ptr lljit, - std::unique_ptr target_machine, bool cached) + llvm::TargetIRAnalysis target_ir_analysis, bool cached) : context_(std::make_unique()), lljit_(std::move(lljit)), ir_builder_(std::make_unique>(*context_)), @@ -288,8 +282,11 @@ Engine::Engine(const std::shared_ptr& conf, optimize_(conf->optimize()), cached_(cached), function_registry_(conf->function_registry()), - target_machine_(std::move(target_machine)), + target_ir_analysis_(std::move(target_ir_analysis)), conf_(conf) { + mangle_ = std::make_unique(lljit_->getExecutionSession(), + lljit_->getDataLayout()); + AddProcessSymbol(*lljit_, *mangle_); // LLVM 10 doesn't like the expr function name to be the same as the module name auto module_id = "gdv_module_" + std::to_string(reinterpret_cast(this)); module_ = std::make_unique(module_id, *context_); @@ -297,7 +294,8 @@ Engine::Engine(const std::shared_ptr& conf, Engine::~Engine() {} -Status Engine::Init() { +Status Engine::Init(std::unordered_set function_names) { + used_functions_ = std::move(function_names); std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); // Add mappings for global functions that can be accessed from LLVM/IR module. @@ -306,7 +304,13 @@ Status Engine::Init() { } Status Engine::LoadFunctionIRs() { - if (!functions_loaded_) { + if (!mandatory_functions_loaded_) { + ARROW_RETURN_NOT_OK(LoadMandatoryPreCompiledIR()); + mandatory_functions_loaded_ = true; + } + + bool is_ir_function_used = used_functions_.size() > used_c_functions_.size(); + if (!functions_loaded_ && is_ir_function_used) { ARROW_RETURN_NOT_OK(LoadPreCompiledIR()); ARROW_RETURN_NOT_OK(DecimalIR::AddFunctions(this)); ARROW_RETURN_NOT_OK(LoadExternalPreCompiledIR()); @@ -315,22 +319,42 @@ Status Engine::LoadFunctionIRs() { return Status::OK(); } +class NoOpObjectCache : public llvm::ObjectCache { + public: + void notifyObjectCompiled(const llvm::Module* M, + llvm::MemoryBufferRef ObjBuffer) override {} + + std::unique_ptr getObject(const llvm::Module* M) override { + return nullptr; + } +}; + +llvm::ObjectCache* DefaultObjectCache() { + static NoOpObjectCache no_op_object_cache; + return &no_op_object_cache; +} + /// factory method to construct the engine. -Result> Engine::Make( - const std::shared_ptr& conf, bool cached, - std::optional> object_cache) { +Result> Engine::Make(const std::shared_ptr& conf, + bool cached, + llvm::ObjectCache* object_cache) { + if (object_cache == nullptr) { + object_cache = DefaultObjectCache(); + } + std::call_once(llvm_init_once_flag, InitOnce); ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf)); - ARROW_ASSIGN_OR_RAISE(auto jit, BuildJIT(jtmb, object_cache)); auto maybe_tm = jtmb.createTargetMachine(); ARROW_ASSIGN_OR_RAISE(auto target_machine, AsArrowResult(maybe_tm, "Could not create target machine: ")); + auto target_ir_analysis = target_machine->getTargetIRAnalysis(); + ARROW_ASSIGN_OR_RAISE(auto jit, + BuildJIT(jtmb, std::move(target_machine), object_cache)); std::unique_ptr engine{ - new Engine(conf, std::move(jit), std::move(target_machine), cached)}; + new Engine(conf, std::move(jit), std::move(target_ir_analysis), cached)}; - ARROW_RETURN_NOT_OK(engine->Init()); return engine; } @@ -360,14 +384,11 @@ llvm::Module* Engine::module() { return module_.get(); } -// Handling for pre-compiled IR libraries. -Status Engine::LoadPreCompiledIR() { - auto const bitcode = llvm::StringRef(reinterpret_cast(kPrecompiledBitcode), - kPrecompiledBitcodeSize); - +Status LoadIR(const std::string& name, llvm::StringRef bitcode, llvm::Module& module, + llvm::LLVMContext& context) { /// Read from file into memory buffer. llvm::ErrorOr> buffer_or_error = - llvm::MemoryBuffer::getMemBuffer(bitcode, "precompiled", false); + llvm::MemoryBuffer::getMemBuffer(bitcode, name, false); ARROW_RETURN_IF(!buffer_or_error, Status::CodeGenError("Could not load module from IR: ", @@ -377,13 +398,27 @@ Status Engine::LoadPreCompiledIR() { /// Parse the IR module. llvm::Expected> module_or_error = - llvm::getOwningLazyBitcodeModule(std::move(buffer), *context()); + llvm::getOwningLazyBitcodeModule(std::move(buffer), context); // NOTE: llvm::handleAllErrors() fails linking with RTTI-disabled LLVM builds // (ARROW-5148) - ARROW_RETURN_NOT_OK(VerifyAndLinkModule(*module_, std::move(module_or_error))); + ARROW_RETURN_NOT_OK(VerifyAndLinkModule(module, std::move(module_or_error))); return Status::OK(); } +// Handling for pre-compiled IR libraries. +Status Engine::LoadPreCompiledIR() { + auto const bitcode = llvm::StringRef(reinterpret_cast(kPrecompiledBitcode), + kPrecompiledBitcodeSize); + return LoadIR("precompiled", bitcode, *module_, *context()); +} + +Status Engine::LoadMandatoryPreCompiledIR() { + auto const bitcode = + llvm::StringRef(reinterpret_cast(kPrecompiledMandatoryBitcode), + kPrecompiledMandatoryBitcodeSize); + return LoadIR("mandatory_precompiled", bitcode, *module_, *context()); +} + static llvm::MemoryBufferRef AsLLVMMemoryBuffer(const arrow::Buffer& arrow_buffer) { auto const data = reinterpret_cast(arrow_buffer.data()); auto const size = arrow_buffer.size(); @@ -501,12 +536,11 @@ Status Engine::FinalizeModule() { ARROW_RETURN_NOT_OK(RemoveUnusedFunctions()); if (optimize_) { - auto target_analysis = target_machine_->getTargetIRAnalysis(); // misc passes to allow for inlining, vectorization, .. #if LLVM_VERSION_MAJOR >= 14 - OptimizeModuleWithNewPassManager(*module_, std::move(target_analysis)); + OptimizeModuleWithNewPassManager(*module_, std::move(target_ir_analysis_)); #else - OptimizeModuleWithLegacyPassManager(*module_, std::move(target_analysis)); + OptimizeModuleWithLegacyPassManager(*module_, std::move(target_ir_analysis_)); #endif } @@ -556,15 +590,25 @@ Result Engine::CompiledFunction(const std::string& function) { void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type, const std::vector& args, void* func) { - auto const prototype = llvm::FunctionType::get(ret_type, args, /*is_var_arg*/ false); - llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, name, module()); - AddAbsoluteSymbol(*lljit_, name, func); + // if the function is not used, don't add it to the module for better performance + bool is_internal_func = internal_functions_.find(name) != internal_functions_.end(); + if (is_internal_func || used_functions_.find(name) != used_functions_.end()) { + if (!is_internal_func) { + used_c_functions_.emplace(name); + } + auto const prototype = llvm::FunctionType::get(ret_type, args, /*is_var_arg*/ false); + llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, name, module()); + AddAbsoluteSymbol(lljit_->getMainJITDylib(), *mangle_, name, func); + } } arrow::Status Engine::AddGlobalMappings() { - ARROW_RETURN_NOT_OK(ExportedFuncsRegistry::AddMappings(this)); - ExternalCFunctions c_funcs(function_registry_); - return c_funcs.AddMappings(this); + if (!cached_) { + ARROW_RETURN_NOT_OK(ExportedFuncsRegistry::AddMappings(this)); + ExternalCFunctions c_funcs(function_registry_); + return c_funcs.AddMappings(this); + } + return Status::OK(); } const std::string& Engine::ir() { diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 565c3f14250..f0319047507 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -26,6 +26,11 @@ #include #include +#if LLVM_VERSION_MAJOR >= 11 +#include +#else +#include +#endif #include "arrow/util/logging.h" #include "arrow/util/macros.h" @@ -57,12 +62,12 @@ class GANDIVA_EXPORT Engine { /// /// \param[in] config the engine configuration /// \param[in] cached flag to mark if the module is already compiled and cached - /// \param[in] object_cache an optional object_cache used for building the module + /// \param[in] object_cache an optional object_cache used for building the module, if + /// not provided, no caching is done /// \return arrow::Result containing the created engine static Result> Make( const std::shared_ptr& config, bool cached, - std::optional> object_cache = - std::nullopt); + llvm::ObjectCache* object_cache = NULLPTR); /// Add the function to the list of IR functions that need to be compiled. /// Compiling only the functions that are used by the module saves time. @@ -74,8 +79,8 @@ class GANDIVA_EXPORT Engine { /// Optimise and compile the module. Status FinalizeModule(); - /// Set LLVM ObjectCache. - Status SetLLVMObjectCache(GandivaObjectCache& object_cache); + /// Set cached LLVM ObjectCode + Status SetCachedObjectCode(std::unique_ptr cached_buffer); /// Get the compiled function corresponding to the irfunction. Result CompiledFunction(const std::string& function); @@ -90,13 +95,15 @@ class GANDIVA_EXPORT Engine { /// Load the function IRs that can be accessed in the module. Status LoadFunctionIRs(); + /// Post construction init. This _must_ be called after the constructor. + /// @param[in] used_functions set of function names that are expected to be used by the + /// engine + Status Init(std::unordered_set used_functions); + private: Engine(const std::shared_ptr& conf, std::unique_ptr lljit, - std::unique_ptr target_machine, bool cached); - - // Post construction init. This _must_ be called after the constructor. - Status Init(); + llvm::TargetIRAnalysis target_is_analysis, bool cached); static void InitOnce(); @@ -104,6 +111,10 @@ class GANDIVA_EXPORT Engine { /// the main module. Status LoadPreCompiledIR(); + /// load mandatory pre-compiled IR modules from precompiled_bitcode.cc and merge them + /// into the main module. Mandatory IR includes functions manipulating bitmaps + Status LoadMandatoryPreCompiledIR(); + // load external pre-compiled bitcodes into module Status LoadExternalPreCompiledIR(); @@ -115,19 +126,44 @@ class GANDIVA_EXPORT Engine { std::unique_ptr context_; std::unique_ptr lljit_; + std::unique_ptr mangle_; std::unique_ptr> ir_builder_; std::unique_ptr module_; LLVMTypes types_; std::vector functions_to_compile_; + std::unordered_set used_functions_; + std::unordered_set used_c_functions_; + + // all internally used C stub functions and IR function names + static inline const std::unordered_set internal_functions_ = { + // internal C stub functions + "gdv_fn_context_arena_malloc", + "gdv_fn_context_set_error_msg", + "gdv_fn_populate_varlen_vector", + "gdv_fn_context_arena_reset", + "gdv_fn_in_expr_lookup_int32", + "gdv_fn_in_expr_lookup_int64", + "gdv_fn_in_expr_lookup_float", + "gdv_fn_in_expr_lookup_double", + "gdv_fn_in_expr_lookup_decimal", + "gdv_fn_in_expr_lookup_utf8", + // internal IR functions + "bitMapGetBit", + "bitMapSetBit", + "bitMapValidityGetBit", + "bitMapClearBitIfFalse", + }; bool optimize_ = true; bool module_finalized_ = false; bool cached_; bool functions_loaded_ = false; + bool mandatory_functions_loaded_ = false; std::shared_ptr function_registry_; std::string module_ir_; std::unique_ptr target_machine_; + llvm::TargetIRAnalysis target_ir_analysis_; const std::shared_ptr conf_; }; diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index df8eed5fd63..9fdfe089735 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -69,6 +69,7 @@ Status ExprDecomposer::Visit(const FunctionNode& in_node) { const NativeFunction* native_function = registry_.LookupSignature(signature); DCHECK(native_function) << "Missing Signature " << signature.ToString(); + used_functions_.emplace(native_function->pc_name()); // decompose the children. std::vector args; for (auto& child : node.children()) { diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h index 90a27744b36..e3de40d62b5 100644 --- a/cpp/src/gandiva/expr_decomposer.h +++ b/cpp/src/gandiva/expr_decomposer.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "gandiva/arrow.h" @@ -49,6 +50,10 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor { return status; } + [[nodiscard]] const std::unordered_set& UsedFunctions() const { + return used_functions_; + } + private: ARROW_DISALLOW_COPY_AND_ASSIGN(ExprDecomposer); @@ -125,6 +130,7 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor { Annotator& annotator_; std::stack> if_entries_stack_; ValueValidityPairPtr result_; + std::unordered_set used_functions_; bool nested_if_else_; }; diff --git a/cpp/src/gandiva/expr_decomposer_test.cc b/cpp/src/gandiva/expr_decomposer_test.cc index 194c13bc82c..b954aee7ab9 100644 --- a/cpp/src/gandiva/expr_decomposer_test.cc +++ b/cpp/src/gandiva/expr_decomposer_test.cc @@ -19,11 +19,13 @@ #include +#include "arrow/testing/gtest_util.h" #include "gandiva/annotator.h" #include "gandiva/dex.h" #include "gandiva/function_registry.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/node.h" +#include "gandiva/tree_expr_builder.h" namespace gandiva { @@ -405,4 +407,34 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) { EXPECT_TRUE(decomposer.if_entries_stack_.empty()); } +TEST_F(TestExprDecomposer, TestGetUsedFunctionsInExpr) { + Annotator annotator; + ExprDecomposer decomposer(*registry_, annotator); + auto field0 = field("f0", int32()); + auto f0 = std::make_shared(field0); + auto is_not_null_func = TreeExprBuilder::MakeFunction("isnotnull", {f0}, boolean()); + ValueValidityPairPtr value_validity; + ASSERT_OK(decomposer.Decompose(*is_not_null_func, &value_validity)); + auto used_functions = decomposer.UsedFunctions(); + ASSERT_EQ(used_functions.size(), 1); + ASSERT_EQ(used_functions.find("isnotnull_int32") != used_functions.end(), true); +} + +TEST_F(TestExprDecomposer, TestGetMultipleUsedFunctionsInExpr) { + Annotator annotator; + ExprDecomposer decomposer(*registry_, annotator); + auto field0 = field("f0", int32()); + auto field1 = field("f1", int32()); + auto f0 = std::make_shared(field0); + auto f1 = std::make_shared(field1); + auto add_func = TreeExprBuilder::MakeFunction("add", {f0, f1}, int32()); + auto is_not_null_func = + TreeExprBuilder::MakeFunction("isnotnull", {add_func}, boolean()); + ValueValidityPairPtr value_validity; + ASSERT_OK(decomposer.Decompose(*is_not_null_func, &value_validity)); + auto used_functions = decomposer.UsedFunctions(); + ASSERT_EQ(used_functions.size(), 2); + ASSERT_EQ(used_functions.find("add_int32_int32") != used_functions.end(), true); + ASSERT_EQ(used_functions.find("isnotnull_int32") != used_functions.end(), true); +} } // namespace gandiva diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index bcef954a473..34ea1d17aa3 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -850,19 +850,6 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type() /*return_type*/, args, reinterpret_cast(gdv_fn_dec_from_string)); - // gdv_fn_dec_to_string - args = { - types->i64_type(), // context - types->i64_type(), // int64_t x_high - types->i64_type(), // int64_t x_low - types->i32_type(), // int32_t x_scale - types->i64_ptr_type(), // int64_t* dec_str_len - }; - - engine->AddGlobalMappingForFunc("gdv_fn_dec_to_string", - types->i8_ptr_type() /*return_type*/, args, - reinterpret_cast(gdv_fn_dec_to_string)); - // gdv_fn_in_expr_lookup_int32 args = {types->i64_type(), // int64_t in holder ptr types->i32_type(), // int32 value diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 62ebab08f4d..8b15d4db108 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -48,8 +48,10 @@ Result> LLVMGenerator::Make( std::unique_ptr llvm_generator( new LLVMGenerator(cached, config->function_registry())); - ARROW_ASSIGN_OR_RAISE(llvm_generator->engine_, - Engine::Make(config, cached, object_cache)); + ARROW_ASSIGN_OR_RAISE( + llvm_generator->engine_, + Engine::Make(config, cached, + object_cache.has_value() ? &object_cache.value().get() : nullptr)); return llvm_generator; } @@ -63,15 +65,24 @@ LLVMGenerator::GetCache() { } Status LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) { - return engine_->SetLLVMObjectCache(object_cache); + auto cached_buffer = object_cache.getObject(nullptr); + return engine_->SetCachedObjectCode(std::move(cached_buffer)); } -Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr output) { - int idx = static_cast(compiled_exprs_.size()); +arrow::Result LLVMGenerator::Decompose(const ExpressionPtr& expr) { // decompose the expression to separate out value and validities. ExprDecomposer decomposer(*function_registry_, annotator_); ValueValidityPairPtr value_validity; ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity)); + + auto& used_functions = decomposer.UsedFunctions(); + functions_in_exprs_.insert(used_functions.begin(), used_functions.end()); + return value_validity; +} + +Status LLVMGenerator::Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, + const FieldDescriptorPtr output) { + int idx = static_cast(compiled_exprs_.size()); // Generate the IR function for the decomposed expression. auto compiled_expr = std::make_unique(value_validity, output); std::string fn_name = "expr_" + std::to_string(idx) + "_" + @@ -92,9 +103,19 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode mode) { selection_vector_mode_ = mode; + std::vector expr_value_validities; for (auto& expr : exprs) { + ARROW_ASSIGN_OR_RAISE(auto value_validity, Decompose(expr)); + expr_value_validities.push_back(value_validity); + } + + ARROW_RETURN_NOT_OK(engine_->Init(std::move(functions_in_exprs_))); + + for (size_t i = 0; i < exprs.size(); ++i) { + const auto& expr = exprs[i]; auto output = annotator_.AddOutputFieldDescriptor(expr->result()); - ARROW_RETURN_NOT_OK(Add(expr, output)); + auto value_validity = expr_value_validities[i]; + ARROW_RETURN_NOT_OK(Add(expr, std::move(value_validity), output)); } // Compile and inject into the process' memory the generated function. diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index 0c532998e8b..9fa5ad35742 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -184,9 +184,12 @@ class GANDIVA_EXPORT LLVMGenerator { bool has_arena_allocs_; }; + arrow::Result Decompose(const ExpressionPtr& expr); + // Generate the code for one expression for default mode, with the output of // the expression going to 'output'. - Status Add(const ExpressionPtr expr, const FieldDescriptorPtr output); + Status Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, + const FieldDescriptorPtr output); /// Generate code to load the vector at specified index in the 'arg_addrs' array. llvm::Value* LoadVectorAtIndex(llvm::Value* arg_addrs, llvm::Type* type, int idx, @@ -263,6 +266,7 @@ class GANDIVA_EXPORT LLVMGenerator { // used for debug bool enable_ir_traces_; std::vector trace_strings_; + std::unordered_set functions_in_exprs_; }; } // namespace gandiva diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc index 79654e7b78c..10e60c29240 100644 --- a/cpp/src/gandiva/llvm_generator_test.cc +++ b/cpp/src/gandiva/llvm_generator_test.cc @@ -49,6 +49,10 @@ class TestLLVMGenerator : public ::testing::Test { ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false)); + std::unordered_set used_functions; + used_functions.insert(function_name); + ASSERT_OK(generator->engine_->Init(used_functions)); + auto module = generator->module(); ASSERT_OK(generator->engine_->LoadFunctionIRs()); EXPECT_NE(module->getFunction(function_name), nullptr); @@ -59,6 +63,11 @@ class TestLLVMGenerator : public ::testing::Test { TEST_F(TestLLVMGenerator, VerifyPCFunctions) { ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); + std::unordered_set used_functions; + for (auto& iter : *registry_) { + used_functions.insert(iter.pc_name()); + } + ASSERT_OK(generator->engine_->Init(used_functions)); llvm::Module* module = generator->module(); ASSERT_OK(generator->engine_->LoadFunctionIRs()); for (auto& iter : *registry_) { @@ -102,6 +111,10 @@ TEST_F(TestLLVMGenerator, TestAdd) { // LLJIT is used std::string fn_name = "llvm_gen_test_add_expr"; + std::unordered_set used_functions{"add_int32_int32"}; + + ASSERT_OK(generator->engine_->Init(used_functions)); + ASSERT_OK(generator->engine_->LoadFunctionIRs()); ASSERT_OK(generator->CodeGenExprValue(func_dex, 4, desc_sum, 0, fn_name, SelectionVector::MODE_NONE)); diff --git a/cpp/src/gandiva/make_precompiled_bitcode.py b/cpp/src/gandiva/make_precompiled_bitcode.py index 97d96f8a878..c515748232a 100644 --- a/cpp/src/gandiva/make_precompiled_bitcode.py +++ b/cpp/src/gandiva/make_precompiled_bitcode.py @@ -19,7 +19,6 @@ import sys -marker = b"" def expand(data): """ @@ -29,21 +28,26 @@ def expand(data): return expanded_data.encode('ascii') -def apply_template(template, data): +def apply_template(template, marker, data): if template.count(marker) != 1: raise ValueError("Invalid template") return template.replace(marker, expand(data)) +def read_file(filepath): + with open(filepath, "rb") as file: + return file.read() if __name__ == "__main__": - if len(sys.argv) != 4: - raise ValueError("Usage: {0}