diff --git a/src/CodeGen_Hexagon.cpp b/src/CodeGen_Hexagon.cpp index 1103c18d973b..3e2893f11d1e 100644 --- a/src/CodeGen_Hexagon.cpp +++ b/src/CodeGen_Hexagon.cpp @@ -8,6 +8,7 @@ #include "AlignLoads.h" #include "CSE.h" #include "CodeGen_Internal.h" +#include "CodeGen_Posix.h" #include "Debug.h" #include "HexagonOptimize.h" #include "IREquality.h" @@ -32,11 +33,111 @@ using std::vector; using namespace llvm; +#ifdef WITH_HEXAGON + +namespace { + +/** A code generator that emits Hexagon code from a given Halide stmt. */ +class CodeGen_Hexagon : public CodeGen_Posix { +public: + /** Create a Hexagon code generator for the given Hexagon target. */ + CodeGen_Hexagon(Target); + +protected: + void compile_func(const LoweredFunc &f, + const std::string &simple_name, const std::string &extern_name) override; + + void init_module() override; + + std::string mcpu() const override; + std::string mattrs() const override; + int isa_version; + bool use_soft_float_abi() const override; + int native_vector_bits() const override; + + llvm::Function *define_hvx_intrinsic(llvm::Function *intrin, Type ret_ty, + const std::string &name, + std::vector arg_types, + int flags); + + int is_hvx_v65_or_later() const { + return (isa_version >= 65); + } + + using CodeGen_Posix::visit; + + /** Nodes for which we want to emit specific hexagon intrinsics */ + ///@{ + void visit(const Max *) override; + void visit(const Min *) override; + void visit(const Call *) override; + void visit(const Mul *) override; + void visit(const Select *) override; + void visit(const Allocate *) override; + ///@} + + /** We ask for an extra vector on each allocation to enable fast + * clamped ramp loads. */ + int allocation_padding(Type type) const override { + return CodeGen_Posix::allocation_padding(type) + native_vector_bits() / 8; + } + + /** Call an LLVM intrinsic, potentially casting the operands to + * match the type of the function. */ + ///@{ + llvm::Value *call_intrin_cast(llvm::Type *ret_ty, llvm::Function *F, + std::vector Ops); + llvm::Value *call_intrin_cast(llvm::Type *ret_ty, int id, + std::vector Ops); + ///@} + + /** Define overloads of CodeGen_LLVM::call_intrin that determine + * the intrin_lanes from the type, and allows the function to + * return null if the maybe option is true and the intrinsic is + * not found. */ + ///@{ + using CodeGen_LLVM::call_intrin; + llvm::Value *call_intrin(Type t, const std::string &name, + std::vector, bool maybe = false); + llvm::Value *call_intrin(llvm::Type *t, const std::string &name, + std::vector, bool maybe = false); + ///@} + + /** Override CodeGen_LLVM to use hexagon intrinics when possible. */ + ///@{ + llvm::Value *interleave_vectors(const std::vector &v) override; + llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b, + const std::vector &indices) override; + using CodeGen_Posix::shuffle_vectors; + ///@} + + /** Generate a LUT lookup using vlut instructions. */ + ///@{ + llvm::Value *vlut(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 1 << 30); + llvm::Value *vlut(llvm::Value *lut, const std::vector &indices); + ///@} + + llvm::Value *vdelta(llvm::Value *lut, const std::vector &indices); + + /** Because HVX intrinsics operate on vectors of i32, using them + * requires a lot of extraneous bitcasts, which make it difficult + * to manipulate the IR. This function avoids generating redundant + * bitcasts. */ + llvm::Value *create_bitcast(llvm::Value *v, llvm::Type *ty); + +private: + /** Generates code for computing the size of an allocation from a + * list of its extents and its size. Fires a runtime assert + * (halide_error) if the size overflows 2^31 -1, the maximum + * positive number an int32_t can hold. */ + llvm::Value *codegen_cache_allocation_size(const std::string &name, Type type, const std::vector &extents); + + /** Generate a LUT (8/16 bit, max_index < 256) lookup using vlut instructions. */ + llvm::Value *vlut256(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 255); +}; + CodeGen_Hexagon::CodeGen_Hexagon(Target t) : CodeGen_Posix(t) { -#if !defined(WITH_HEXAGON) - user_error << "hexagon not enabled for this build of Halide.\n"; -#endif user_assert(llvm_Hexagon_enabled) << "llvm build not configured with Hexagon target enabled.\n"; if (target.has_feature(Halide::Target::HVX_v66)) { @@ -50,8 +151,6 @@ CodeGen_Hexagon::CodeGen_Hexagon(Target t) << "Creating a Codegen target for Hexagon without the hvx target feature.\n"; } -namespace { - Stmt call_halide_qurt_hvx_lock(const Target &target) { Expr hvx_lock = Call::make(Int(32), "halide_qurt_hvx_lock", {}, Call::Extern); @@ -420,8 +519,6 @@ Stmt inject_hvx_lock_unlock(Stmt body, const Target &target) { return body; } -} // namespace - void CodeGen_Hexagon::compile_func(const LoweredFunc &f, const string &simple_name, const string &extern_name) { @@ -489,8 +586,6 @@ void CodeGen_Hexagon::compile_func(const LoweredFunc &f, CodeGen_Posix::end_func(f.args); } -namespace { - struct HvxIntrinsic { enum { BroadcastScalarsToWords = 1 << 0, // Some intrinsics need scalar arguments @@ -788,8 +883,6 @@ const HvxIntrinsic intrinsic_wrappers[] = { // need to be implemented in the runtime module, or via // fall-through to CodeGen_LLVM. -} // namespace - void CodeGen_Hexagon::init_module() { CodeGen_Posix::init_module(); @@ -1022,8 +1115,6 @@ Value *CodeGen_Hexagon::interleave_vectors(const vector &v) { return CodeGen_Posix::interleave_vectors(v); } -namespace { - // Check if indices form a strided ramp, allowing undef elements to // pretend to be part of the ramp. bool is_strided_ramp(const vector &indices, int &start, int &stride) { @@ -1086,8 +1177,6 @@ bool is_concat_or_slice(const vector &indices) { return true; } -} // namespace - Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b, const vector &indices) { llvm::Type *a_ty = a->getType(); @@ -1559,7 +1648,7 @@ Value *CodeGen_Hexagon::vdelta(Value *lut, const vector &indices) { return vlut(lut, indices); } -static Value *create_vector(llvm::Type *ty, int val) { +Value *create_vector(llvm::Type *ty, int val) { llvm::Type *scalar_ty = ty->getScalarType(); Constant *value = ConstantInt::get(scalar_ty, val); return ConstantVector::getSplat(element_count(get_vector_num_elements(ty)), value); @@ -1755,8 +1844,6 @@ int CodeGen_Hexagon::native_vector_bits() const { return 128 * 8; } -namespace { - Expr maybe_scalar(Expr x) { const Broadcast *xb = x.as(); if (xb) { @@ -1766,8 +1853,6 @@ Expr maybe_scalar(Expr x) { } } -} // namespace - void CodeGen_Hexagon::visit(const Mul *op) { if (op->type.is_vector()) { value = @@ -2225,5 +2310,21 @@ void CodeGen_Hexagon::visit(const Allocate *alloc) { } } +} // namespace + +CodeGen_Posix *new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context) { + CodeGen_Hexagon *ret = new CodeGen_Hexagon(target); + ret->set_context(context); + return ret; +} + +#else // WITH_HEXAGON + +CodeGen_Posix *new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context) { + user_error << "hexagon not enabled for this build of Halide.\n"; +} + +#endif // WITH_HEXAGON + } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_Hexagon.h b/src/CodeGen_Hexagon.h index a4f2c93a6185..f1dc98937a23 100644 --- a/src/CodeGen_Hexagon.h +++ b/src/CodeGen_Hexagon.h @@ -5,109 +5,21 @@ * Defines the code-generator for producing Hexagon machine code */ -#include "CodeGen_Posix.h" +namespace llvm { -namespace Halide { -namespace Internal { - -/** A code generator that emits Hexagon code from a given Halide stmt. */ -class CodeGen_Hexagon : public CodeGen_Posix { -public: - /** Create a Hexagon code generator for the given Hexagon target. */ - CodeGen_Hexagon(Target); - -protected: - void compile_func(const LoweredFunc &f, - const std::string &simple_name, const std::string &extern_name) override; - - void init_module() override; - - std::string mcpu() const override; - std::string mattrs() const override; - int isa_version; - bool use_soft_float_abi() const override; - int native_vector_bits() const override; - - llvm::Function *define_hvx_intrinsic(llvm::Function *intrin, Type ret_ty, - const std::string &name, - std::vector arg_types, - int flags); - - int is_hvx_v65_or_later() const { - return (isa_version >= 65); - } +class LLVMContext; - using CodeGen_Posix::visit; +} - /** Nodes for which we want to emit specific hexagon intrinsics */ - ///@{ - void visit(const Max *) override; - void visit(const Min *) override; - void visit(const Call *) override; - void visit(const Mul *) override; - void visit(const Select *) override; - void visit(const Allocate *) override; - ///@} - - /** We ask for an extra vector on each allocation to enable fast - * clamped ramp loads. */ - int allocation_padding(Type type) const override { - return CodeGen_Posix::allocation_padding(type) + native_vector_bits() / 8; - } - - /** Call an LLVM intrinsic, potentially casting the operands to - * match the type of the function. */ - ///@{ - llvm::Value *call_intrin_cast(llvm::Type *ret_ty, llvm::Function *F, - std::vector Ops); - llvm::Value *call_intrin_cast(llvm::Type *ret_ty, int id, - std::vector Ops); - ///@} - - /** Define overloads of CodeGen_LLVM::call_intrin that determine - * the intrin_lanes from the type, and allows the function to - * return null if the maybe option is true and the intrinsic is - * not found. */ - ///@{ - using CodeGen_LLVM::call_intrin; - llvm::Value *call_intrin(Type t, const std::string &name, - std::vector, bool maybe = false); - llvm::Value *call_intrin(llvm::Type *t, const std::string &name, - std::vector, bool maybe = false); - ///@} - - /** Override CodeGen_LLVM to use hexagon intrinics when possible. */ - ///@{ - llvm::Value *interleave_vectors(const std::vector &v) override; - llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b, - const std::vector &indices) override; - using CodeGen_Posix::shuffle_vectors; - ///@} - - /** Generate a LUT lookup using vlut instructions. */ - ///@{ - llvm::Value *vlut(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 1 << 30); - llvm::Value *vlut(llvm::Value *lut, const std::vector &indices); - ///@} +namespace Halide { - llvm::Value *vdelta(llvm::Value *lut, const std::vector &indices); +struct Target; - /** Because HVX intrinsics operate on vectors of i32, using them - * requires a lot of extraneous bitcasts, which make it difficult - * to manipulate the IR. This function avoids generating redundant - * bitcasts. */ - llvm::Value *create_bitcast(llvm::Value *v, llvm::Type *ty); +namespace Internal { -private: - /** Generates code for computing the size of an allocation from a - * list of its extents and its size. Fires a runtime assert - * (halide_error) if the size overflows 2^31 -1, the maximum - * positive number an int32_t can hold. */ - llvm::Value *codegen_cache_allocation_size(const std::string &name, Type type, const std::vector &extents); +class CodeGen_Posix; - /** Generate a LUT (8/16 bit, max_index < 256) lookup using vlut instructions. */ - llvm::Value *vlut256(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 255); -}; +CodeGen_Posix *new_CodeGen_Hexagon(const Target &target, llvm::LLVMContext &context); } // namespace Internal } // namespace Halide diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index 37211d1f3384..15c16144ef27 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -373,7 +373,7 @@ CodeGen_LLVM *CodeGen_LLVM::new_for_target(const Target &target, } else if (target.arch == Target::POWERPC) { return make_codegen(target, context); } else if (target.arch == Target::Hexagon) { - return make_codegen(target, context); + return new_CodeGen_Hexagon(target, context); } else if (target.arch == Target::WebAssembly) { return make_codegen(target, context); } else if (target.arch == Target::RISCV) {